Spaces:

Nexari-Research
/

Nexari-G1.1

Running

App Files Files Community

Nexari-Research commited on 6 days ago

Commit

af8ad70

verified ·

1 Parent(s): 0aff05c

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -135

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — Optimized for Speed + Original System Instructions Restored
 import os
 import json
 import logging
@@ -8,7 +8,10 @@ from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from typing import Any, Dict, List
-# Local model modules (expect these to exist in your project)
 import router_model
 import coder_model
 import chat_model
@@ -19,14 +22,33 @@ logger = logging.getLogger("nexari.app")
 app = FastAPI()
 MODEL_DIR = "./models"
-# Limit history to keep CPU processing fast (System + 6 recent messages)
-# Isse response time increase nahi hoga chahe chat kitni bhi lambi ho.
 MAX_HISTORY_MESSAGES = 6
 def ensure_model_dir_or_fail():
     try:
         os.makedirs(MODEL_DIR, exist_ok=True)
-        logger.info("Model directory ensured: %s", MODEL_DIR)
     except Exception as e:
         logger.critical("Unable to create model dir: %s", e)
         raise
@@ -40,15 +62,26 @@ async def startup_event():
     coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
     chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
     tasks = [
         asyncio.create_task(router_model.load_model_async()),
         asyncio.create_task(coder_model.load_model_async()),
         asyncio.create_task(chat_model.load_model_async()),
     ]
-    results = await asyncio.gather(*tasks, return_exceptions=True)
-    for i, r in enumerate(results):
-        if isinstance(r, Exception):
-            logger.error("Model loader %d failed: %s", i, r)
     logger.info("Startup complete.")
 class Message(BaseModel):
@@ -60,19 +93,61 @@ class ChatRequest(BaseModel):
     stream: bool = True
     temperature: float = 0.7
 def get_intent(last_user_message: str):
-    # Optimization: Short messages skip the router to save 2-3 seconds
-    if len(last_user_message) < 15:
         return "chat", "neutral"
-    # If router model missing, use a simple rule
     if not getattr(router_model, "model", None):
-        text = (last_user_message or "").lower()
-        if any(tok in text for tok in ["code", "bug", "fix", "error", "function", "python", "js", "html", "css"]):
-            return "coding", "neutral"
-        if any(tok in text for tok in ["why", "how", "prove", "reason", "think"]):
-            return "reasoning", "neutral"
-        return "chat", "neutral"
     sys_prompt = "Analyze intent. Return JSON like {'intent':'coding'|'chat'|'reasoning', 'sentiment':'neutral'|'sad'}"
     try:
@@ -80,54 +155,25 @@ def get_intent(last_user_message: str):
             messages=[{"role":"system","content":sys_prompt},{"role":"user","content": last_user_message}],
             temperature=0.1, max_tokens=50
         )
-        content = ""
-        try:
-            content = res['choices'][0]['message']['content'].lower()
-        except Exception:
-            try:
-                content = res['choices'][0]['text'].lower()
-            except Exception:
-                content = ""
-        if "coding" in content:
-            return "coding", "neutral"
-        if "reasoning" in content or "think" in content or "solve" in content:
-            return "reasoning", "neutral"
-        if "sad" in content:
-            return "chat", "sad"
         return "chat", "neutral"
-    except Exception as e:
-        logger.exception("Router failure: %s", e)
         return "chat", "neutral"
 def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
-    """
-    Ensure chunk is a JSON-serializable mapping for SSE.
-    Remove any 'status' fields so we never send an unintended status overwrite.
-    """
     if isinstance(chunk, dict):
         out = {}
         for k, v in chunk.items():
-            if k == "status":
-                logger.debug("Dropping status field from model chunk: %s", v)
-                continue
-            if isinstance(v, (str, int, float, bool, type(None))):
-                out[k] = v
-            else:
-                try:
-                    json.dumps(v)
-                    out[k] = v
-                except Exception:
-                    out[k] = str(v)
         return out
-    else:
-        try:
-            txt = str(chunk)
-            return {"text": txt}
-        except Exception:
-            return {"text": "[UNSERIALIZABLE_CHUNK]"}
-# === RESTORED ORIGINAL SYSTEM INSTRUCTIONS ===
-# Static system identity prefix to include in system prompts:
 SYSTEM_IDENTITY_PREFIX = (
     "You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
     "always understand the user behaviour and request. "
@@ -137,73 +183,49 @@ SYSTEM_IDENTITY_PREFIX = (
 )
 def limit_context(messages: List[Dict]) -> List[Dict]:
-    """
-    Smart Context Trimming (Speed Fix):
-    1. Always keep the SYSTEM prompt (Identity/Instructions).
-    2. Keep only the last MAX_HISTORY_MESSAGES.
-    This ensures processing time stays fast even after 100 turns.
-    """
-    if not messages:
-        return []
-    system_msg = None
-    chat_history = []
-    # Separate system message
-    if messages[0].get("role") == "system":
-        system_msg = messages[0]
-        remaining = messages[1:]
-    else:
-        remaining = messages
-    # Keep only the last N messages
     if len(remaining) > MAX_HISTORY_MESSAGES:
-        chat_history = remaining[-MAX_HISTORY_MESSAGES:]
-    else:
-        chat_history = remaining
-    # Reconstruct
     final_msgs = []
-    if system_msg:
-        final_msgs.append(system_msg)
-    final_msgs.extend(chat_history)
     return final_msgs
 @app.post("/v1/chat/completions")
 async def chat_endpoint(request: ChatRequest):
-    # Validate incoming
     raw_messages = [m.dict() for m in request.messages] if request.messages else []
-    if not raw_messages:
-        return {"error": "No messages provided."}
     last = raw_messages[-1]['content']
     intent, sentiment = get_intent(last)
     selected_model = None
-    # base system message will always include identity prefix
     sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful assistant."
-    status_indicator = "Thinking..."  # default if not changed below
     if intent == "coding":
         if not getattr(coder_model, "model", None):
-            logger.error("Coder model not available.")
             return {"error":"Coder model not available."}
         selected_model = coder_model.model
-        sys_msg = SYSTEM_IDENTITY_PREFIX + "You are an expert Coding Assistant. Write clean, efficient code with comments where helpful."
-        status_indicator = "Coding..."
-        logger.info("Intent: CODING")
     elif intent == "reasoning":
         if not getattr(chat_model, "model", None):
-            logger.error("Chat model not available for reasoning.")
             return {"error":"Model not available."}
         selected_model = chat_model.model
-        sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a reasoning-focused assistant. Walk through your thinking clearly and show steps if relevant."
-        status_indicator = "Reasoning..."
-        logger.info("Intent: REASONING")
     else:
         if not getattr(chat_model, "model", None):
-            logger.error("Chat model missing.")
             return {"error":"Chat model not available."}
         selected_model = chat_model.model
         logger.info("Intent: CHAT (%s)", sentiment)
@@ -211,60 +233,30 @@ async def chat_endpoint(request: ChatRequest):
             sys_msg = SYSTEM_IDENTITY_PREFIX + "You are empathic and calm. Provide supportive, concise responses."
             status_indicator = "Empathizing..."
         else:
-            # default chat system message with identity included
             sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful conversational assistant."
-    # ensure system prompt is present (first message)
     if raw_messages[0].get("role") != "system":
         raw_messages.insert(0, {"role":"system","content": sys_msg})
     else:
-        # replace existing system content to ensure identity is present and consistent
         raw_messages[0]["content"] = sys_msg
-    # === APPLY OPTIMIZATION: TRIM CONTEXT ===
-    # Yeh line add ki hai taaki model sirf relevant history process kare aur fast rahe
     optimized_messages = limit_context(raw_messages)
-    # Streaming generator
     def iter_response():
         try:
-            # 1) Send a single authoritative SSE status event (event: status)
-            status_payload = json.dumps({"status": status_indicator})
-            event_payload = f"event: status\n"
-            event_payload += f"data: {status_payload}\n\n"
-            logger.info("Sending authoritative status event: %s", status_indicator)
-            yield event_payload
-            # 2) small flush hint
             yield ":\n\n"
-            # 3) Start streaming model output
             stream = selected_model.create_chat_completion(
-                messages=optimized_messages, # USING OPTIMIZED MESSAGES
                 temperature=request.temperature,
                 stream=True
             )
-            # Iterate model generator and sanitize every chunk
             for chunk in stream:
-                safe = sanitize_chunk(chunk)
-                try:
-                    yield f"data: {json.dumps(safe)}\n\n"
-                except Exception:
-                    # fallback to a safe string representation
-                    yield f"data: {json.dumps({'text': str(safe)})}\n\n"
-            # 4) final done marker
             yield "data: [DONE]\n\n"
-            logger.info("Stream finished for request (status was: %s)", status_indicator)
         except Exception as e:
-            logger.exception("Streaming error: %s", e)
-            # send explicit error object
-            try:
-                yield f"data: {json.dumps({'error': str(e)})}\n\n"
-            except Exception:
-                yield "data: {\"error\":\"streaming failure\"}\n\n"
-            yield "data: [DONE]\n\n"
     return StreamingResponse(iter_response(), media_type="text/event-stream")

+# app.py — Hybrid Neural Router + Optimized Performance + Original Identity
 import os
 import json
 import logging
 from pydantic import BaseModel
 from typing import Any, Dict, List
+# New Neural Network Library
+from sentence_transformers import SentenceTransformer, util
+# Local model modules
 import router_model
 import coder_model
 import chat_model
 app = FastAPI()
 MODEL_DIR = "./models"
 MAX_HISTORY_MESSAGES = 6
+# === NEURAL NETWORK CONFIGURATION ===
+# Using a lightweight, high-speed embedding model (State of the Art for speed/accuracy)
+NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
+neural_classifier = None
+# "Anchors" define the center of gravity for each intent in the Neural Space
+INTENT_ANCHORS = {
+    "coding": [
+        "write python code", "fix this bug", "create a function", "html css script",
+        "debug this error", "generate java code", "react component", "sql query"
+    ],
+    "reasoning": [
+        "solve this math problem", "explain the logic", "why does this happen",
+        "prove that", "step by step reasoning", "analyze this complex topic"
+    ],
+    "sad": [
+        "i am feeling sad", "i am depressed", "life is hard", "i am lonely",
+        "i feel like crying", "everything is going wrong"
+    ]
+}
+encoded_anchors = {}
 def ensure_model_dir_or_fail():
     try:
         os.makedirs(MODEL_DIR, exist_ok=True)
     except Exception as e:
         logger.critical("Unable to create model dir: %s", e)
         raise
     coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
     chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
+    # Load LLMs asynchronously
     tasks = [
         asyncio.create_task(router_model.load_model_async()),
         asyncio.create_task(coder_model.load_model_async()),
         asyncio.create_task(chat_model.load_model_async()),
     ]
+    # Load Neural Network Classifier (Runs on CPU, very fast)
+    global neural_classifier, encoded_anchors
+    try:
+        logger.info("Loading Neural Intent Classifier...")
+        neural_classifier = SentenceTransformer(NEURAL_MODEL_NAME)
+        # Pre-calculate anchor vectors so we don't do it every request (Optimization)
+        for intent, texts in INTENT_ANCHORS.items():
+            encoded_anchors[intent] = neural_classifier.encode(texts, convert_to_tensor=True)
+        logger.info("Neural Intent Classifier Ready.")
+    except Exception as e:
+        logger.error(f"Failed to load Neural Classifier: {e}")
+    await asyncio.gather(*tasks, return_exceptions=True)
     logger.info("Startup complete.")
 class Message(BaseModel):
     stream: bool = True
     temperature: float = 0.7
+def get_intent_neural(text: str):
+    """
+    Uses Vector Embeddings & Cosine Similarity to detect intent.
+    This is the "Neural Function" connecting the router.
+    """
+    if not neural_classifier:
+        return None, None
+    try:
+        # 1. Convert user text to Vector
+        user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
+        scores = {}
+        # 2. Compare against all Anchor Vectors (Cosine Similarity)
+        for intent, anchor_embeddings in encoded_anchors.items():
+            # Find max similarity with any anchor phrase in this category
+            cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
+            best_score = float(cosine_scores.max())
+            scores[intent] = best_score
+        # Find the winner
+        best_intent = max(scores, key=scores.get)
+        confidence = scores[best_intent]
+        logger.info(f"Neural Intent Analysis: {scores} -> Winner: {best_intent}")
+        # Threshold check: If confidence is low (< 0.3), treat as general chat
+        if confidence < 0.35:
+            return "chat", "neutral"
+        if best_intent == "coding": return "coding", "neutral"
+        if best_intent == "reasoning": return "reasoning", "neutral"
+        if best_intent == "sad": return "chat", "sad"
+        return "chat", "neutral"
+    except Exception as e:
+        logger.error(f"Neural Check Failed: {e}")
+        return None, None
 def get_intent(last_user_message: str):
+    # 1. Ultra-Fast Keyword Check (Legacy)
+    # Short circuit for very short messages
+    if len(last_user_message) < 5:
         return "chat", "neutral"
+    # 2. NEURAL NETWORK CHECK (The Upgrade)
+    # This understands meaning, not just keywords.
+    neural_intent, neural_sentiment = get_intent_neural(last_user_message)
+    if neural_intent:
+        return neural_intent, neural_sentiment
+    # 3. Fallback to Generative Router (If Neural Network is unsure or fails)
+    # Only runs if neural check was inconclusive or library failed
     if not getattr(router_model, "model", None):
+         return "chat", "neutral"
     sys_prompt = "Analyze intent. Return JSON like {'intent':'coding'|'chat'|'reasoning', 'sentiment':'neutral'|'sad'}"
     try:
             messages=[{"role":"system","content":sys_prompt},{"role":"user","content": last_user_message}],
             temperature=0.1, max_tokens=50
         )
+        content = res['choices'][0]['message']['content'].lower()
+        if "coding" in content: return "coding", "neutral"
+        if "reasoning" in content: return "reasoning", "neutral"
+        if "sad" in content: return "chat", "sad"
         return "chat", "neutral"
+    except Exception:
         return "chat", "neutral"
 def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
     if isinstance(chunk, dict):
         out = {}
         for k, v in chunk.items():
+            if k == "status": continue
+            out[k] = v
         return out
+    return {"text": str(chunk)}
+# === ORIGINAL SYSTEM INSTRUCTIONS ===
 SYSTEM_IDENTITY_PREFIX = (
     "You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
     "always understand the user behaviour and request. "
 )
 def limit_context(messages: List[Dict]) -> List[Dict]:
+    if not messages: return []
+    system_msg = messages[0] if messages[0].get("role") == "system" else None
+    start_idx = 1 if system_msg else 0
+    remaining = messages[start_idx:]
+    # Smart Trimming
     if len(remaining) > MAX_HISTORY_MESSAGES:
+        remaining = remaining[-MAX_HISTORY_MESSAGES:]
     final_msgs = []
+    if system_msg: final_msgs.append(system_msg)
+    final_msgs.extend(remaining)
     return final_msgs
 @app.post("/v1/chat/completions")
 async def chat_endpoint(request: ChatRequest):
     raw_messages = [m.dict() for m in request.messages] if request.messages else []
+    if not raw_messages: return {"error": "No messages provided."}
     last = raw_messages[-1]['content']
+    # Get Intent using the new Neural Pipeline
     intent, sentiment = get_intent(last)
     selected_model = None
     sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful assistant."
+    status_indicator = "Thinking..."
     if intent == "coding":
         if not getattr(coder_model, "model", None):
             return {"error":"Coder model not available."}
         selected_model = coder_model.model
+        sys_msg = SYSTEM_IDENTITY_PREFIX + "You are an expert Coding Assistant. Write clean, efficient code with comments."
+        status_indicator = "Coding (Neural Mode)..."
+        logger.info("Intent: CODING (Neural)")
     elif intent == "reasoning":
         if not getattr(chat_model, "model", None):
             return {"error":"Model not available."}
         selected_model = chat_model.model
+        sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a reasoning-focused assistant. Walk through your thinking clearly."
+        status_indicator = "Reasoning (Neural Mode)..."
+        logger.info("Intent: REASONING (Neural)")
     else:
         if not getattr(chat_model, "model", None):
             return {"error":"Chat model not available."}
         selected_model = chat_model.model
         logger.info("Intent: CHAT (%s)", sentiment)
             sys_msg = SYSTEM_IDENTITY_PREFIX + "You are empathic and calm. Provide supportive, concise responses."
             status_indicator = "Empathizing..."
         else:
             sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful conversational assistant."
     if raw_messages[0].get("role") != "system":
         raw_messages.insert(0, {"role":"system","content": sys_msg})
     else:
         raw_messages[0]["content"] = sys_msg
     optimized_messages = limit_context(raw_messages)
     def iter_response():
         try:
+            yield f"event: status\ndata: {json.dumps({'status': status_indicator})}\n\n"
             yield ":\n\n"
             stream = selected_model.create_chat_completion(
+                messages=optimized_messages,
                 temperature=request.temperature,
                 stream=True
             )
             for chunk in stream:
+                yield f"data: {json.dumps(sanitize_chunk(chunk))}\n\n"
             yield "data: [DONE]\n\n"
         except Exception as e:
+            logger.exception("Stream Error: %s", e)
+            yield f"data: {json.dumps({'error': str(e)})}\n\n"
     return StreamingResponse(iter_response(), media_type="text/event-stream")