Spaces:

Nexari-Research
/

Nexari-G1.1

Running

App Files Files Community

Nexari-Research commited on 5 days ago

Commit

a3fb9fb

verified ·

1 Parent(s): 9d13a39

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -49

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py -- FINAL: Forced Thought Structure & Robust Streaming
 import os
 import json
 import logging
@@ -30,8 +30,23 @@ def ensure_model_dir_or_fail():
 @app.on_event("startup")
 async def startup_event():
     ensure_model_dir_or_fail()
-    # Initialize paths logic here...
     logger.info("Startup complete.")
 class Message(BaseModel):
@@ -41,16 +56,41 @@ class Message(BaseModel):
 class ChatRequest(BaseModel):
     messages: list[Message]
     stream: bool = True
-    temperature: float = 0.6 # Slightly lower temp for better following instructions
 def get_intent(last_user_message: str):
-    # Basic intent detection
-    text = (last_user_message or "").lower()
-    if any(tok in text for tok in ["code", "python", "script", "function", "html", "css", "fix"]):
-        return "coding"
-    if any(tok in text for tok in ["why", "how", "reason", "analyze", "think"]):
-        return "reasoning"
-    return "chat"
 def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
     if isinstance(chunk, dict):
@@ -61,74 +101,107 @@ def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
                 out[k] = v
             else:
                 try:
-                    out[k] = json.dumps(v)
-                except:
                     out[k] = str(v)
         return out
-    return {"text": str(chunk)}
-# === SYSTEM PROMPTS WITH STRICT PLANNING ===
-SYSTEM_IDENTITY = "You are Nexari-G1, created by Piyush."
-# Is instruction se Model hamesha pehle sochega
-STRICT_THOUGHT_PROMPT = (
-    "\n[IMPORTANT: REASONING REQUIRED]\n"
-    "1. You MUST start your response with a thinking block using <think>...</think> tags.\n"
-    "2. Inside <think>, plan your answer, analyze the user's request, and check for errors.\n"
-    "3. After </think>, provide the final polite response to the user.\n"
-    "4. Do NOT output the thought block if the user asks for a simple greeting, but still use empty tags <think></think> to maintain format."
 )
 @app.post("/v1/chat/completions")
 async def chat_endpoint(request: ChatRequest):
     messages = [m.dict() for m in request.messages] if request.messages else []
-    if not messages: return {"error": "No messages"}
-    last_msg = messages[-1]['content']
-    intent = get_intent(last_msg)
-    # Select Model logic (Simplified for robustness)
-    selected_model = chat_model.model # Default
-    status_msg = "Thinking..."
-    if intent == "coding" and getattr(coder_model, "model", None):
         selected_model = coder_model.model
-        status_msg = "Planning Code..."
-    elif intent == "reasoning" and getattr(chat_model, "model", None):
         selected_model = chat_model.model
-        status_msg = "Deep Researching..."
     # Inject System Prompt
-    full_system_prompt = SYSTEM_IDENTITY + STRICT_THOUGHT_PROMPT
     if messages[0].get("role") != "system":
-        messages.insert(0, {"role": "system", "content": full_system_prompt})
     else:
-        messages[0]["content"] += STRICT_THOUGHT_PROMPT
-    async def iter_response():
         try:
-            # 1. Send Status
-            yield f"event: status\ndata: {json.dumps({'status': status_msg})}\n\n"
-            # 2. Generate Stream
-            # Note: Ensure your model.create_chat_completion supports stream=True properly
             stream = selected_model.create_chat_completion(
                 messages=messages,
                 temperature=request.temperature,
                 stream=True
             )
             for chunk in stream:
-                safe_chunk = sanitize_chunk(chunk)
-                yield f"data: {json.dumps(safe_chunk)}\n\n"
-                # Small sleep to allow EventLoop to breathe if needed
-                await asyncio.sleep(0)
             yield "data: [DONE]\n\n"
         except Exception as e:
-            logger.error(f"Stream Error: {e}")
             yield f"data: {json.dumps({'error': str(e)})}\n\n"
     return StreamingResponse(iter_response(), media_type="text/event-stream")

+# app.py -- Final: Forced Thinking/Planning logic via System Prompt
 import os
 import json
 import logging
 @app.on_event("startup")
 async def startup_event():
+    logger.info("Startup: ensure model dir and set base dirs...")
     ensure_model_dir_or_fail()
+    router_model.BASE_DIR = os.path.join(MODEL_DIR, "router")
+    coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
+    chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
+    # Load models asynchronously
+    tasks = [
+        asyncio.create_task(router_model.load_model_async()),
+        asyncio.create_task(coder_model.load_model_async()),
+        asyncio.create_task(chat_model.load_model_async()),
+    ]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    for i, r in enumerate(results):
+        if isinstance(r, Exception):
+            logger.error("Model loader %d failed: %s", i, r)
     logger.info("Startup complete.")
 class Message(BaseModel):
 class ChatRequest(BaseModel):
     messages: list[Message]
     stream: bool = True
+    temperature: float = 0.7
 def get_intent(last_user_message: str):
+    # Simple keyword based routing fallback
+    if not getattr(router_model, "model", None):
+        text = (last_user_message or "").lower()
+        if any(tok in text for tok in ["code", "bug", "fix", "error", "function", "python", "js", "html", "css"]):
+            return "coding", "neutral"
+        if any(tok in text for tok in ["why", "how", "prove", "reason", "think", "analyze"]):
+            return "reasoning", "neutral"
+        return "chat", "neutral"
+    # Router model logic (if available)
+    sys_prompt = "Analyze intent. Return JSON like {'intent':'coding'|'chat'|'reasoning', 'sentiment':'neutral'|'sad'}"
+    try:
+        res = router_model.model.create_chat_completion(
+            messages=[{"role":"system","content":sys_prompt},{"role":"user","content": last_user_message}],
+            temperature=0.1, max_tokens=50
+        )
+        content = ""
+        try:
+            content = res['choices'][0]['message']['content'].lower()
+        except Exception:
+            try:
+                content = res['choices'][0]['text'].lower()
+            except Exception:
+                content = ""
+        if "coding" in content: return "coding", "neutral"
+        if "reasoning" in content: return "reasoning", "neutral"
+        if "sad" in content: return "chat", "sad"
+        return "chat", "neutral"
+    except Exception as e:
+        logger.exception("Router failure: %s", e)
+        return "chat", "neutral"
 def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
     if isinstance(chunk, dict):
                 out[k] = v
             else:
                 try:
+                    json.dumps(v)
+                    out[k] = v
+                except Exception:
                     out[k] = str(v)
         return out
+    else:
+        try:
+            txt = str(chunk)
+            return {"text": txt}
+        except Exception:
+            return {"text": ""}
+# === CORE IDENTITY & THOUGHT INSTRUCTIONS ===
+SYSTEM_IDENTITY_PREFIX = (
+    "You are Nexari-G1, created by Piyush. "
+    "Your name is Nexari-G1. "
+)
+# === MAGIC SAUCE: This forces the model to generate the <think> tags ===
+THOUGHT_INSTRUCTION = (
+    "\n\nIMPORTANT INSTRUCTION: "
+    "Before generating your final answer, you must perform a 'Thought Process'. "
+    "You must enclose your planning, reasoning, and analysis inside <think> and </think> tags. "
+    "Step 1: Analyze the user's request inside <think>...</think>. "
+    "Step 2: Provide the final response after the </think> tag. "
+    "Example format: <think>User wants X. I should verify Y first.</think> Here is the answer for X..."
 )
 @app.post("/v1/chat/completions")
 async def chat_endpoint(request: ChatRequest):
     messages = [m.dict() for m in request.messages] if request.messages else []
+    if not messages:
+        return {"error": "No messages provided."}
+    last = messages[-1]['content']
+    intent, sentiment = get_intent(last)
+    selected_model = None
+    # Base system message setup with Thought Instruction
+    sys_msg = SYSTEM_IDENTITY_PREFIX + THOUGHT_INSTRUCTION
+    if intent == "coding":
+        if not getattr(coder_model, "model", None):
+            return {"error":"Coder model not available."}
         selected_model = coder_model.model
+        sys_msg += "\nYou are an expert Coding Assistant. Plan your code logic in the thought block."
+        status_indicator = "Planning Code..."
+    elif intent == "reasoning":
+        if not getattr(chat_model, "model", None):
+            return {"error":"Model not available."}
+        selected_model = chat_model.model
+        sys_msg += "\nYou are a reasoning engine. Break down complex logic in the thought block first."
+        status_indicator = "Deep Thinking..."
+    else: # Chat
+        if not getattr(chat_model, "model", None):
+            return {"error":"Chat model not available."}
         selected_model = chat_model.model
+        if sentiment == "sad":
+            sys_msg += "\nBe empathic and supportive."
+            status_indicator = "Empathizing..."
+        else:
+            sys_msg += "\nYou are a helpful assistant."
+            status_indicator = "Thinking..."
     # Inject System Prompt
     if messages[0].get("role") != "system":
+        messages.insert(0, {"role":"system","content": sys_msg})
     else:
+        # Append instruction to existing system prompt to ensure it overrides
+        messages[0]["content"] = sys_msg
+    def iter_response():
         try:
+            # 1. Send Status Event (Triggers Dynamic Indicator text update)
+            status_payload = json.dumps({"status": status_indicator})
+            yield f"event: status\ndata: {status_payload}\n\n"
+            # Small delay hint
+            yield ":\n\n"
+            # 2. Stream Model Response
             stream = selected_model.create_chat_completion(
                 messages=messages,
                 temperature=request.temperature,
                 stream=True
             )
             for chunk in stream:
+                safe = sanitize_chunk(chunk)
+                # Ensure we send properly formatted SSE data
+                yield f"data: {json.dumps(safe)}\n\n"
+            # 3. Done Marker
             yield "data: [DONE]\n\n"
         except Exception as e:
+            logger.exception("Streaming error: %s", e)
             yield f"data: {json.dumps({'error': str(e)})}\n\n"
+            yield "data: [DONE]\n\n"
     return StreamingResponse(iter_response(), media_type="text/event-stream")