Spaces:

Nexari-Research
/

Nexari-G1.1

Running

App Files Files Community

Nexari-Research commited on about 17 hours ago

Commit

941017d

verified ·

1 Parent(s): 0c5b849

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -443

app.py CHANGED Viewed

@@ -1,27 +1,27 @@
-# app.py — Nexari G1 (Tool-aware + Two-pass protocol + Confidence + Self-learning)
-# Based on your codebase; preserves existing instructions and behavior.
 import os
 import json
 import logging
 import asyncio
 from datetime import datetime
-import pytz
-from fastapi import FastAPI, Request
-from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
-from typing import Any, Dict, List, Optional
 import coder_model
 import chat_model
-# Optional libs
 try:
     from sentence_transformers import SentenceTransformer, util
     from duckduckgo_search import DDGS
     NEURAL_AVAILABLE = True
-except Exception:
     NEURAL_AVAILABLE = False
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("nexari.app")
@@ -30,513 +30,272 @@ app = FastAPI()
 MODEL_DIR = "./models"
 NEURAL_DIR = os.path.join(MODEL_DIR, "neural")
-MEMORY_PATH = os.path.join(MODEL_DIR, "decision_memory.jsonl")
 NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
-neural_model = None
-anchor_vectors: Dict[str, Any] = {}
-MAX_HISTORY_MESSAGES = 6
 INTENT_ANCHORS = {
-    "coding": [
-        "write code", "fix bug", "error in code", "update file",
-        "full code", "optimize function", "refactor", "api backend"
-    ],
-    "reasoning": [
-        "explain", "why", "how does", "step by step",
-        "deep analysis", "logic behind", "compare"
-    ],
-    "search": [
-        "latest", "current", "today update",
-        "who is now", "recent news", "real time"
-    ],
-    "time": ["current time", "date today"],
-    "identity": ["who are you", "your name", "who created you"]
 }
-COMMAND_WORDS = {"give", "write", "fix", "update", "create", "build"}
-LEARNING_WORDS = {"explain", "why", "how", "meaning", "reason"}
-INFO_WORDS = {"latest", "current", "today", "now", "price", "news"}
-memory_lock = asyncio.Lock()
-# ---------------- Startup ----------------
-def ensure_dirs():
-    os.makedirs(MODEL_DIR, exist_ok=True)
-    os.makedirs(NEURAL_DIR, exist_ok=True)
-def load_neural():
-    global neural_model, anchor_vectors
-    if not NEURAL_AVAILABLE:
-        logger.warning("Neural libs not available.")
-        return
-    model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
-    neural_model = model
-    anchor_vectors.clear()
-    for k, texts in INTENT_ANCHORS.items():
-        anchor_vectors[k] = model.encode(texts, convert_to_tensor=True)
-    logger.info("🧠 Neural decision engine loaded")
 @app.on_event("startup")
-async def startup():
-    ensure_dirs()
     coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
     chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
-    await asyncio.gather(
-        coder_model.load_model_async(),
-        chat_model.load_model_async(),
-        asyncio.to_thread(load_neural),
-        return_exceptions=True
-    )
-    if not os.path.exists(MEMORY_PATH):
-        open(MEMORY_PATH, "a").close()
-    logger.info("🚀 Nexari G1 fully online")
-# ---------------- Schemas ----------------
 class Message(BaseModel):
     role: str
     content: str
 class ChatRequest(BaseModel):
-    messages: List[Message]
     stream: bool = True
     temperature: float = 0.7
-class FeedbackPayload(BaseModel):
-    text: str
-    correct_intent: str
-    correct_behavior: Optional[str] = None
-# ---------------- Tools ----------------
-def get_time():
-    ist = pytz.timezone("Asia/Kolkata")
-    return datetime.now(ist).strftime("%A, %d %B %Y, %I:%M %p IST")
-def search_sync(q: str):
     try:
-        with DDGS() as d:
-            res = list(d.text(q, max_results=4))
-        return "\n".join(
-            f"{r['title']}: {r['body']} ({r['href']})" for r in res
-        ) if res else None
     except Exception as e:
-        logger.error(f"Search error: {e}")
         return None
-async def web_search(q: str):
-    return await asyncio.to_thread(search_sync, q)
-# ---------------- Memory helpers (unchanged) ----------------
-async def append_memory(entry: Dict[str, Any]):
-    async with memory_lock:
-        with open(MEMORY_PATH, "a", encoding="utf-8") as f:
-            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
-async def load_memory(limit: Optional[int] = None) -> List[Dict[str, Any]]:
-    items = []
-    async with memory_lock:
-        if not os.path.exists(MEMORY_PATH):
-            return []
-        with open(MEMORY_PATH, "r", encoding="utf-8") as f:
-            for i, line in enumerate(f):
-                if limit and i >= limit:
-                    break
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    items.append(json.loads(line))
-                except Exception:
-                    continue
-    return items
-def embedding_from_list(lst):
-    try:
-        import torch
-        return torch.tensor(lst)
-    except Exception:
-        import numpy as np
-        return np.array(lst)
-# ---------------- Behavior & Decision Engine (as before) ----------------
-def detect_behavior(text: str) -> str:
-    tokens = set(text.lower().split())
-    if tokens & COMMAND_WORDS:
-        return "command"
-    if tokens & LEARNING_WORDS:
-        return "learning"
-    if tokens & INFO_WORDS:
-        return "info"
-    return "neutral"
-def memory_vote_adjustment(current_emb, prediction_intent, prediction_behavior, base_conf, top_k = 6):
-    if not NEURAL_AVAILABLE:
-        return None, base_conf
-    mem = asyncio.get_event_loop().run_until_complete(load_memory(limit=1000))
-    if not mem:
-        return None, base_conf
-    sims = []
-    for item in mem:
-        try:
-            stored_emb = embedding_from_list(item.get("embedding", []))
-            score = float(util.cos_sim(current_emb, stored_emb).max()) if hasattr(util, "cos_sim") else 0.0
-            sims.append((score, item))
-        except Exception:
-            continue
-    if not sims:
-        return None, base_conf
-    sims.sort(key=lambda x: x[0], reverse=True)
-    top = sims[:top_k]
-    agree = 0
-    disagree = 0
-    intent_votes = {}
-    for score, item in top:
-        item_intent = item.get("intent")
-        if item_intent == prediction_intent:
-            agree += score
-        else:
-            disagree += score
-        intent_votes.setdefault(item_intent, 0.0)
-        intent_votes[item_intent] += score
-    total = agree + disagree + 1e-9
-    support_ratio = agree / total
-    adjusted_conf = base_conf
-    override = None
-    best_vote_intent = max(intent_votes.items(), key=lambda x: x[1])[0]
-    best_vote_score = intent_votes[best_vote_intent] / (sum(intent_votes.values()) + 1e-9)
-    if support_ratio > 0.6:
-        adjusted_conf = min(1.0, base_conf + 0.12)
-    elif support_ratio < 0.4 and best_vote_intent != prediction_intent and best_vote_score > 0.55:
-        override = best_vote_intent
-        adjusted_conf = max(0.25, base_conf * 0.5)
-    else:
-        adjusted_conf = max(0.02, base_conf * (0.9 + support_ratio * 0.1))
-    return override, adjusted_conf
-def analyze_decision(text: str, history: List[str]):
-    tokens = text.lower().split()
-    if "time" in tokens or "date" in tokens:
-        return "time", "neutral", 1.0, {"reason": "hard_override_time"}
-    if "who" in tokens and "you" in tokens:
-        return "identity", "neutral", 1.0, {"reason": "hard_override_identity"}
-    if not NEURAL_AVAILABLE or not neural_model:
-        behavior = detect_behavior(text)
-        return "chat", behavior, 0.05, {"reason": "no_neural_available"}
-    emb = neural_model.encode(text, convert_to_tensor=True)
-    intent_scores = {}
-    for k, v in anchor_vectors.items():
-        try:
-            s = float(util.cos_sim(emb, v).max())
-            intent_scores[k] = s
-        except Exception:
-            intent_scores[k] = 0.0
-    best_intent = max(intent_scores.items(), key=lambda x: x[1])[0]
-    best_score = intent_scores[best_intent]
-    behavior = detect_behavior(text)
-    flow_bonus = 0.0
-    if history:
-        recent = " ".join(history[-2:]).lower()
-        if best_intent == "coding" and any(w in recent for w in COMMAND_WORDS):
-            flow_bonus = 0.12
-        if best_intent == "reasoning" and any(w in recent for w in LEARNING_WORDS):
-            flow_bonus = 0.12
-    base_conf = min(1.0, best_score + flow_bonus)
-    if best_intent == "search" and behavior != "info" and base_conf < 0.50:
-        base_conf = base_conf * 0.6
-        best_intent = "chat"
     try:
-        override_intent, adjusted_conf = memory_vote_adjustment(emb, best_intent, behavior, base_conf)
-        if override_intent:
-            metadata = {
-                "base_intent": best_intent,
-                "override_by_memory": override_intent,
-                "base_conf": base_conf
-            }
-            best_intent = override_intent
-            final_conf = adjusted_conf
-        else:
-            metadata = {"base_intent": best_intent, "base_conf": base_conf, "memory_adjust": False}
-            final_conf = adjusted_conf
     except Exception as e:
-        logger.exception("Memory adjust failed: %s", e)
-        metadata = {"base_intent": best_intent, "base_conf": base_conf, "memory_adjust": "error"}
-        final_conf = base_conf
-    final_conf = max(0.0, min(1.0, final_conf))
-    return best_intent, behavior, final_conf, {**metadata, "intent_scores": intent_scores, "embedding_available": True}
-# ---------------- System prompt + Tool manifest ----------------
 SYSTEM_PREFIX = (
     "You are Nexari G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
-    "Be accurate, friendly, and helpful. "
-    "You have access to server tools which provide real-time data. DO NOT claim direct internet access. "
-    "TOOLS AVAILABLE (server-side):\n"
-    " - get_time(): returns the current server time in IST.\n"
-    " - web_search(query): returns real-time search results (title, snippet, link).\n\n"
-    "USAGE PROTOCOL (CRITICAL):\n"
-    " - If you need the current time, include exactly this token anywhere in your final response: __CALL_GET_TIME__\n"
-    " - If you need a web search, include exactly this token followed by the query on the same line: __CALL_WEBSEARCH__: <your query here>\n"
-    " - Do NOT fabricate the results of searches. If SEARCH results are injected, use ONLY those results when answering.\n"
-    " - If you can answer without calling tools, answer directly. If not sure, you may request a tool using the tokens above.\n\n"
     "ATTENTION PROTOCOL: "
-    "1. Prioritize latest message. "
-    "2. Use history only for context. "
-    "3. Topic change = instant switch. "
-    "4. No repetition unless asked. "
-    "Use emojis where appropriate."
-)
-def trim_context(msgs: List[Dict]):
-    sys = msgs[0] if msgs and msgs[0]["role"] == "system" else None
-    body = msgs[1:] if sys else msgs
-    body = body[-MAX_HISTORY_MESSAGES:]
-    return ([sys] if sys else []) + body
-def sanitize(chunk: Any):
-    if isinstance(chunk, dict):
-        chunk.pop("status", None)
-        return chunk
-    return {"text": str(chunk)}
-# ---------------- Helper: normalize model response to text ----------------
-def extract_text_from_model_response(resp):
-    # robust extraction: handle dict, list, generator-like, or plain string
-    try:
-        # if llama-cpp returns a dict with 'choices' or 'content'
-        if isinstance(resp, dict):
-            # try standard keys
-            if "choices" in resp and isinstance(resp["choices"], list) and resp["choices"]:
-                ch = resp["choices"][0]
-                if isinstance(ch, dict) and ("text" in ch or "message" in ch):
-                    return ch.get("text") or (ch.get("message") and ch["message"].get("content")) or str(resp)
-            if "text" in resp:
-                return resp["text"]
-            if "content" in resp:
-                return resp["content"]
-            return str(resp)
-        if isinstance(resp, (list, tuple)):
-            return " ".join(map(str, resp))
-        # fallback
-        return str(resp)
-    except Exception:
-        return str(resp)
-# ---------------- Chat endpoint (two-pass tool-aware) ----------------
 @app.post("/v1/chat/completions")
 async def chat_endpoint(request: ChatRequest):
     raw_msgs = [m.dict() for m in request.messages] if request.messages else []
-    if not raw_msgs:
-        return JSONResponse({"error": "Empty messages"}, status_code=400)
-    user_text = raw_msgs[-1]["content"]
-    history_texts = [m["content"] for m in raw_msgs[:-1] if m["role"] == "user"]
-    intent, behavior, confidence, meta = analyze_decision(user_text, history_texts)
-    logger.info(f"Decision: intent={intent} behavior={behavior} confidence={confidence:.3f} meta_keys={list(meta.keys())}")
     selected_model = chat_model.model
     sys_msg = SYSTEM_PREFIX
     status = "Thinking..."
     injected_context = ""
-    # If model choice via decision
     if intent == "coding" and getattr(coder_model, "model", None):
         selected_model = coder_model.model
         sys_msg += " You are an Expert Coder. Provide clean, working code."
         status = "Coding..."
-    elif intent == "reasoning":
         status = "Reasoning..."
     elif intent == "time":
-        # direct injection of time; no need to call model tools
-        injected_context = f"CURRENT DATE & TIME: {get_time()}"
         status = "Checking Time..."
     elif intent == "search":
         status = "Searching Web..."
-        # perform server search proactively when intent==search
-        res = await web_search(user_text)
         if res:
-            injected_context = f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
         else:
             injected_context = ""
             status = "Thinking..."
-    # Ensure system prefix is present
     if raw_msgs[0].get("role") != "system":
         raw_msgs.insert(0, {"role":"system","content": sys_msg})
     else:
         raw_msgs[0]["content"] = sys_msg
-    # If we proactively have injected_context (search results or time), attach them
     if injected_context:
-        raw_msgs[-1]['content'] = f"{injected_context}\n\nUSER QUESTION:\n{user_text}"
-    final_msgs = trim_context(raw_msgs)
-    # first pass: run model in non-stream mode to see if model requests a tool
-    try:
-        first_resp = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=False)
-        first_text = extract_text_from_model_response(first_resp)
-    except Exception as e:
-        logger.exception("First-pass model call failed: %s", e)
-        # fallback to streaming directly to avoid blocking
-        first_text = ""
-    # Check for tool-request sentinels in first_text
-    tool_requested = None
-    tool_query = None
-    if "__CALL_GET_TIME__" in first_text:
-        tool_requested = "get_time"
-    elif "__CALL_WEBSEARCH__:" in first_text:
-        # extract query after sentinel
-        try:
-            parts = first_text.split("__CALL_WEBSEARCH__:")
-            if len(parts) >= 2:
-                tool_query = parts[1].strip().splitlines()[0].strip()
-                if tool_query:
-                    tool_requested = "web_search"
-        except Exception:
-            tool_requested = None
-    # If model requested a tool, perform it and re-run model in streaming mode with injected results
-    if tool_requested == "get_time":
-        result = get_time()
-        injected = f"### TOOL_RESULT: GET_TIME\n{result}\n\n---\nNow answer the user's question using the TOOL_RESULT above."
-        final_msgs[-1]['content'] = f"{injected}\n\nUSER QUESTION:\n{user_text}"
-        status = "Using get_time..."
-        # stream final output
-        def stream_with_tool():
-            try:
-                # send decision metadata event
-                decision_payload = {
-                    "intent": intent,
-                    "behavior": behavior,
-                    "confidence": round(confidence, 4),
-                    "tools_available": True,
-                    "tool_requested": "get_time",
-                    "meta": meta
-                }
-                yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
-                yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
-                yield ":\n\n"
-                stream = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=True)
-                for chunk in stream:
-                    yield f"data: {json.dumps(sanitize(chunk))}\n\n"
-                yield "data: [DONE]\n\n"
-            except Exception as e:
-                logger.exception("Stream with tool error: %s", e)
-                yield f"data: {json.dumps({'error': str(e)})}\n\n"
-        return StreamingResponse(stream_with_tool(), media_type="text/event-stream")
-    if tool_requested == "web_search" and tool_query:
-        # do the search
-        res = await web_search(tool_query)
-        injected = f"### TOOL_RESULT: WEB_SEARCH for query: {tool_query}\n{res or 'NO_RESULTS'}\n\n---\nNow answer the user's question using the TOOL_RESULT above."
-        final_msgs[-1]['content'] = f"{injected}\n\nUSER QUESTION:\n{user_text}"
-        status = "Using web_search..."
-        def stream_with_tool():
-            try:
-                decision_payload = {
-                    "intent": intent,
-                    "behavior": behavior,
-                    "confidence": round(confidence, 4),
-                    "tools_available": True,
-                    "tool_requested": "web_search",
-                    "tool_query": tool_query,
-                    "meta": meta
-                }
-                yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
-                yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
-                yield ":\n\n"
-                stream = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=True)
-                for chunk in stream:
-                    yield f"data: {json.dumps(sanitize(chunk))}\n\n"
-                yield "data: [DONE]\n\n"
-            except Exception as e:
-                logger.exception("Stream with web_search error: %s", e)
-                yield f"data: {json.dumps({'error': str(e)})}\n\n"
-        return StreamingResponse(stream_with_tool(), media_type="text/event-stream")
-    # If no tool requested or first pass returned a complete answer, stream the first_text or stream normal model output
-    # If model returned a non-empty phrase in first_text and user wanted streaming false, send that as single data chunk
-    if first_text and not (tool_requested):
-        def stream_simple():
-            try:
-                decision_payload = {
-                    "intent": intent,
-                    "behavior": behavior,
-                    "confidence": round(confidence, 4),
-                    "tools_available": NEURAL_AVAILABLE,
-                    "meta": meta
-                }
-                yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
-                yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
-                yield ":\n\n"
-                # send the precomputed text as one data chunk
-                yield f"data: {json.dumps({'text': first_text})}\n\n"
-                yield "data: [DONE]\n\n"
-            except Exception as e:
-                logger.exception("stream_simple error: %s", e)
-                yield f"data: {json.dumps({'error': str(e)})}\n\n"
-        return StreamingResponse(stream_simple(), media_type="text/event-stream")
-    # fallback streaming (if first_text empty or model prefers streaming)
-    def stream_fallback():
         try:
-            decision_payload = {
-                "intent": intent,
-                "behavior": behavior,
-                "confidence": round(confidence, 4),
-                "tools_available": NEURAL_AVAILABLE,
-                "meta": meta
-            }
-            yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
             yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
             yield ":\n\n"
-            stream = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=True)
             for chunk in stream:
-                yield f"data: {json.dumps(sanitize(chunk))}\n\n"
             yield "data: [DONE]\n\n"
         except Exception as e:
-            logger.exception("stream_fallback error: %s", e)
             yield f"data: {json.dumps({'error': str(e)})}\n\n"
-    return StreamingResponse(stream_fallback(), media_type="text/event-stream")
-# ---------------- Feedback endpoint ----------------
-@app.post("/v1/feedback")
-async def feedback_handler(payload: FeedbackPayload):
-    text = payload.text.strip()
-    intent = payload.correct_intent
-    behavior = payload.correct_behavior or detect_behavior(text)
-    if not text or not intent:
-        return JSONResponse({"error": "text and correct_intent required"}, status_code=400)
-    if NEURAL_AVAILABLE and neural_model:
-        try:
-            emb = neural_model.encode(text, convert_to_tensor=False)
-            emb_list = emb.tolist() if hasattr(emb, "tolist") else list(map(float, emb))
-        except Exception:
-            emb_list = []
-    else:
-        emb_list = []
-    entry = {
-        "text": text,
-        "intent": intent,
-        "behavior": behavior,
-        "embedding": emb_list,
-        "ts": datetime.utcnow().isoformat() + "Z"
-    }
-    await append_memory(entry)
-    logger.info("Feedback saved to memory: intent=%s behavior=%s text=%s", intent, behavior, text[:80])
-    return {"status": "ok", "saved": True}
-@app.get("/v1/memory/size")
-async def memory_size():
-    mem = await load_memory()
-    return {"memory_entries": len(mem)}

+# app.py — Nexari G1 (Advanced Intent Analysis & Confidence Gating)
 import os
 import json
 import logging
 import asyncio
 from datetime import datetime
+import pytz
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
+from typing import Any, Dict, List
+# Local model modules
 import coder_model
 import chat_model
+# === SAFE IMPORT FOR NEW LIBRARIES ===
 try:
     from sentence_transformers import SentenceTransformer, util
     from duckduckgo_search import DDGS
     NEURAL_AVAILABLE = True
+except ImportError:
     NEURAL_AVAILABLE = False
+    print("⚠️ WARNING: sentence-transformers or duckduckgo-search not found.")
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("nexari.app")
 MODEL_DIR = "./models"
 NEURAL_DIR = os.path.join(MODEL_DIR, "neural")
+# === CONFIGURATION ===
 NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
+neural_classifier = None
+encoded_anchors = {}
+MAX_HISTORY_MESSAGES = 6
+# Optimized Anchors for better Vector Separation
 INTENT_ANCHORS = {
+    "coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css", "java code"],
+    "reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics", "chemistry"],
+    "search": ["latest news", "price of gold", "weather today", "who is the ceo", "current stock price", "search google", "find info"],
+    "time": ["what time is it", "current time", "date today", "clock", "day is today"],
+    # New category to pull "Identity" questions away from Search
+    "identity": ["what is your name", "who are you", "who created you", "tell me about yourself", "are you ai"]
 }
+def ensure_model_dir_or_fail():
+    try:
+        os.makedirs(MODEL_DIR, exist_ok=True)
+        os.makedirs(NEURAL_DIR, exist_ok=True)
+    except Exception as e:
+        logger.critical("Unable to create model dir: %s", e)
+# === LOADERS ===
+def load_neural_network():
+    global neural_classifier, encoded_anchors
+    if not NEURAL_AVAILABLE: return
+    try:
+        logger.info("⏳ Loading Neural Intent Model...")
+        model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
+        anchors = {}
+        for intent, texts in INTENT_ANCHORS.items():
+            anchors[intent] = model.encode(texts, convert_to_tensor=True)
+        neural_classifier = model
+        encoded_anchors = anchors
+        logger.info("✅ Neural Intent Classifier Ready!")
+    except Exception as e:
+        logger.error(f"❌ Failed to load Neural Network: {e}")
+async def load_neural_async():
+    await asyncio.to_thread(load_neural_network)
 @app.on_event("startup")
+async def startup_event():
+    ensure_model_dir_or_fail()
     coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
     chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
+    tasks = [
+        asyncio.create_task(coder_model.load_model_async()),
+        asyncio.create_task(chat_model.load_model_async()),
+        asyncio.create_task(load_neural_async()),
+    ]
+    asyncio.gather(*tasks, return_exceptions=True)
+    logger.info("🚀 Server Startup Complete")
 class Message(BaseModel):
     role: str
     content: str
 class ChatRequest(BaseModel):
+    messages: list[Message]
     stream: bool = True
     temperature: float = 0.7
+# === TOOLS ===
+def get_real_time():
+    try:
+        ist = pytz.timezone('Asia/Kolkata')
+        return datetime.now(ist).strftime("%A, %d %B %Y, %I:%M %p (IST)")
+    except Exception:
+        return str(datetime.now())
+def search_sync(query: str):
+    logger.info(f"🔎 Executing Search for: {query}")
     try:
+        with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=4))
+            if not results: return None
+            formatted_res = ""
+            for r in results:
+                formatted_res += f"Source: {r['title']}\nSnippet: {r['body']}\nLink: {r['href']}\n\n"
+            return formatted_res
     except Exception as e:
+        logger.error(f"DDGS Error: {e}")
         return None
+async def perform_web_search(query: str):
+    if not NEURAL_AVAILABLE: return None
+    return await asyncio.to_thread(search_sync, query)
+# === ADVANCED INTENT LOGIC (2025 Technique) ===
+def analyze_deep_intent(text: str):
+    """
+    Combines Neural Similarity with Confidence Gating & Token Chain Analysis.
+    Returns: (intent_name, confidence_score)
+    """
+    # 1. Low-Level Token Analysis (The Chain Reaction Check)
+    text_lower = text.lower()
+    tokens = text_lower.split()
+    # GUARDRAIL: Self-Reference Override
+    # If user asks about "your name", "you", "yourself" -> Force Chat/Identity
+    self_tokens = {"your", "you", "yourself", "created", "made"}
+    if "name" in tokens and len(tokens) < 7 and any(t in tokens for t in self_tokens):
+        return "identity", 0.99
+    if not neural_classifier: return "chat", 0.0
     try:
+        # 2. Neural Vector Search
+        user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
+        scores = {}
+        for intent, anchor_embeddings in encoded_anchors.items():
+            cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
+            # We take the MAX score from the anchor group
+            scores[intent] = float(cosine_scores.max())
+        best_intent = max(scores, key=scores.get)
+        best_score = scores[best_intent]
+        # 3. Confidence Gating
+        # Search needs HIGH confidence to trigger (prevent false positives)
+        if best_intent == "search":
+            if best_score < 0.45: # Strict threshold for search
+                logger.info(f"⚠️ Search detected but low confidence ({best_score:.2f}). Fallback to chat.")
+                return "chat", best_score
+        # Identity maps to chat logic internally
+        if best_intent == "identity":
+            return "chat", best_score
+        # General Threshold
+        if best_score < 0.30:
+            return "chat", best_score
+        return best_intent, best_score
     except Exception as e:
+        logger.error(f"Intent Error: {e}")
+        return "chat", 0.0
+def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
+    if isinstance(chunk, dict):
+        return {k: v for k, v in chunk.items() if k != "status"}
+    return {"text": str(chunk)}
 SYSTEM_PREFIX = (
     "You are Nexari G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
+    "Your core directive is to be helpful, and accurate. "
+    "Always respond naturally, clarity, and Always respond with a friendly, positive tone, regardless of context. "
+    "You have internet access, web search ability, and server tools. Never deny these capabilities. "
+    # --- NEW FOCUS INSTRUCTION START ---
     "ATTENTION PROTOCOL: "
+    "1. Always prioritize the user’s latest message above everything else. "
+    "2. Use previous messages only for context (names, continuity, or details). "
+    "3. If the user changes the topic, immediately switch and ignore the old topic. "
+    "4. Do not repeat previous answers unless the user clearly asks for repetition. "
+    # --- NEW FOCUS INSTRUCTION END ---
+    "Use emojis to make the conversation lively. "
+)
+def limit_context(messages: List[Dict]) -> List[Dict]:
+    if not messages: return []
+    sys = messages[0] if messages[0].get("role") == "system" else None
+    start = 1 if sys else 0
+    rem = messages[start:]
+    if len(rem) > MAX_HISTORY_MESSAGES: rem = rem[-MAX_HISTORY_MESSAGES:]
+    final = []
+    if sys: final.append(sys)
+    final.extend(rem)
+    return final
 @app.post("/v1/chat/completions")
 async def chat_endpoint(request: ChatRequest):
     raw_msgs = [m.dict() for m in request.messages] if request.messages else []
+    if not raw_msgs: return {"error": "Empty messages"}
+    last_msg_text = raw_msgs[-1]['content']
+    # === ANALYZE INTENT ===
+    intent, confidence = analyze_deep_intent(last_msg_text)
+    logger.info(f"🧠 Analysis: Text='{last_msg_text}' | Intent='{intent}' | Conf={confidence:.2f}")
     selected_model = chat_model.model
     sys_msg = SYSTEM_PREFIX
     status = "Thinking..."
     injected_context = ""
+    # === ROUTING ===
     if intent == "coding" and getattr(coder_model, "model", None):
         selected_model = coder_model.model
         sys_msg += " You are an Expert Coder. Provide clean, working code."
         status = "Coding..."
+    elif intent == "reasoning" and getattr(chat_model, "model", None):
+        selected_model = chat_model.model
+        sys_msg += " Think step-by-step."
         status = "Reasoning..."
     elif intent == "time":
+        t = get_real_time()
+        injected_context = f"CURRENT DATE & TIME: {t}"
         status = "Checking Time..."
     elif intent == "search":
         status = "Searching Web..."
+        clean_query = last_msg_text.replace("search", "").replace("google", "").strip()
+        search_q = clean_query if len(clean_query) > 2 else last_msg_text
+        res = await perform_web_search(search_q)
         if res:
+            injected_context = (
+                f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
+                "### INSTRUCTION:\n"
+                "Answer the user's question using ONLY the Search Results above."
+            )
         else:
+            # Silent fallback if search fails
             injected_context = ""
             status = "Thinking..."
+    # === CONSTRUCT MESSAGE ===
     if raw_msgs[0].get("role") != "system":
         raw_msgs.insert(0, {"role":"system","content": sys_msg})
     else:
         raw_msgs[0]["content"] = sys_msg
     if injected_context:
+        new_content = (
+            f"{injected_context}\n\n"
+            f"### USER QUESTION:\n{last_msg_text}"
+        )
+        raw_msgs[-1]['content'] = new_content
+    if not selected_model:
+        if chat_model.model: selected_model = chat_model.model
+        elif coder_model.model: selected_model = coder_model.model
+        else: return {"error": "System warming up..."}
+    final_msgs = limit_context(raw_msgs)
+    def iter_response():
         try:
             yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
             yield ":\n\n"
+            stream = selected_model.create_chat_completion(
+                messages=final_msgs, temperature=request.temperature, stream=True
+            )
             for chunk in stream:
+                yield f"data: {json.dumps(sanitize_chunk(chunk))}\n\n"
             yield "data: [DONE]\n\n"
         except Exception as e:
+            logger.error(f"Stream error: {e}")
             yield f"data: {json.dumps({'error': str(e)})}\n\n"
+    return StreamingResponse(iter_response(), media_type="text/event-stream")