Nexari-Research commited on
Commit
941017d
·
verified ·
1 Parent(s): 0c5b849

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +202 -443
app.py CHANGED
@@ -1,27 +1,27 @@
1
- # app.py — Nexari G1 (Tool-aware + Two-pass protocol + Confidence + Self-learning)
2
- # Based on your codebase; preserves existing instructions and behavior.
3
-
4
  import os
5
  import json
6
  import logging
7
  import asyncio
8
  from datetime import datetime
9
- import pytz
10
- from fastapi import FastAPI, Request
11
- from fastapi.responses import StreamingResponse, JSONResponse
12
  from pydantic import BaseModel
13
- from typing import Any, Dict, List, Optional
14
 
 
15
  import coder_model
16
  import chat_model
17
 
18
- # Optional libs
19
  try:
20
  from sentence_transformers import SentenceTransformer, util
21
  from duckduckgo_search import DDGS
22
  NEURAL_AVAILABLE = True
23
- except Exception:
24
  NEURAL_AVAILABLE = False
 
25
 
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger("nexari.app")
@@ -30,513 +30,272 @@ app = FastAPI()
30
 
31
  MODEL_DIR = "./models"
32
  NEURAL_DIR = os.path.join(MODEL_DIR, "neural")
33
- MEMORY_PATH = os.path.join(MODEL_DIR, "decision_memory.jsonl")
34
 
 
35
  NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
36
- neural_model = None
37
- anchor_vectors: Dict[str, Any] = {}
38
-
39
- MAX_HISTORY_MESSAGES = 6
40
 
 
41
  INTENT_ANCHORS = {
42
- "coding": [
43
- "write code", "fix bug", "error in code", "update file",
44
- "full code", "optimize function", "refactor", "api backend"
45
- ],
46
- "reasoning": [
47
- "explain", "why", "how does", "step by step",
48
- "deep analysis", "logic behind", "compare"
49
- ],
50
- "search": [
51
- "latest", "current", "today update",
52
- "who is now", "recent news", "real time"
53
- ],
54
- "time": ["current time", "date today"],
55
- "identity": ["who are you", "your name", "who created you"]
56
  }
57
 
58
- COMMAND_WORDS = {"give", "write", "fix", "update", "create", "build"}
59
- LEARNING_WORDS = {"explain", "why", "how", "meaning", "reason"}
60
- INFO_WORDS = {"latest", "current", "today", "now", "price", "news"}
 
 
 
61
 
62
- memory_lock = asyncio.Lock()
63
 
64
- # ---------------- Startup ----------------
 
 
65
 
66
- def ensure_dirs():
67
- os.makedirs(MODEL_DIR, exist_ok=True)
68
- os.makedirs(NEURAL_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
69
 
70
- def load_neural():
71
- global neural_model, anchor_vectors
72
- if not NEURAL_AVAILABLE:
73
- logger.warning("Neural libs not available.")
74
- return
75
- model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
76
- neural_model = model
77
- anchor_vectors.clear()
78
- for k, texts in INTENT_ANCHORS.items():
79
- anchor_vectors[k] = model.encode(texts, convert_to_tensor=True)
80
- logger.info("🧠 Neural decision engine loaded")
81
 
82
  @app.on_event("startup")
83
- async def startup():
84
- ensure_dirs()
85
  coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
86
  chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
87
- await asyncio.gather(
88
- coder_model.load_model_async(),
89
- chat_model.load_model_async(),
90
- asyncio.to_thread(load_neural),
91
- return_exceptions=True
92
- )
93
- if not os.path.exists(MEMORY_PATH):
94
- open(MEMORY_PATH, "a").close()
95
- logger.info("🚀 Nexari G1 fully online")
96
-
97
- # ---------------- Schemas ----------------
98
 
99
  class Message(BaseModel):
100
  role: str
101
  content: str
102
 
103
  class ChatRequest(BaseModel):
104
- messages: List[Message]
105
  stream: bool = True
106
  temperature: float = 0.7
107
 
108
- class FeedbackPayload(BaseModel):
109
- text: str
110
- correct_intent: str
111
- correct_behavior: Optional[str] = None
112
-
113
- # ---------------- Tools ----------------
114
 
115
- def get_time():
116
- ist = pytz.timezone("Asia/Kolkata")
117
- return datetime.now(ist).strftime("%A, %d %B %Y, %I:%M %p IST")
 
 
 
118
 
119
- def search_sync(q: str):
 
120
  try:
121
- with DDGS() as d:
122
- res = list(d.text(q, max_results=4))
123
- return "\n".join(
124
- f"{r['title']}: {r['body']} ({r['href']})" for r in res
125
- ) if res else None
 
 
126
  except Exception as e:
127
- logger.error(f"Search error: {e}")
128
  return None
129
 
130
- async def web_search(q: str):
131
- return await asyncio.to_thread(search_sync, q)
132
-
133
- # ---------------- Memory helpers (unchanged) ----------------
134
-
135
- async def append_memory(entry: Dict[str, Any]):
136
- async with memory_lock:
137
- with open(MEMORY_PATH, "a", encoding="utf-8") as f:
138
- f.write(json.dumps(entry, ensure_ascii=False) + "\n")
139
-
140
- async def load_memory(limit: Optional[int] = None) -> List[Dict[str, Any]]:
141
- items = []
142
- async with memory_lock:
143
- if not os.path.exists(MEMORY_PATH):
144
- return []
145
- with open(MEMORY_PATH, "r", encoding="utf-8") as f:
146
- for i, line in enumerate(f):
147
- if limit and i >= limit:
148
- break
149
- line = line.strip()
150
- if not line:
151
- continue
152
- try:
153
- items.append(json.loads(line))
154
- except Exception:
155
- continue
156
- return items
157
-
158
- def embedding_from_list(lst):
159
- try:
160
- import torch
161
- return torch.tensor(lst)
162
- except Exception:
163
- import numpy as np
164
- return np.array(lst)
165
-
166
- # ---------------- Behavior & Decision Engine (as before) ----------------
167
-
168
- def detect_behavior(text: str) -> str:
169
- tokens = set(text.lower().split())
170
- if tokens & COMMAND_WORDS:
171
- return "command"
172
- if tokens & LEARNING_WORDS:
173
- return "learning"
174
- if tokens & INFO_WORDS:
175
- return "info"
176
- return "neutral"
177
-
178
- def memory_vote_adjustment(current_emb, prediction_intent, prediction_behavior, base_conf, top_k = 6):
179
- if not NEURAL_AVAILABLE:
180
- return None, base_conf
181
- mem = asyncio.get_event_loop().run_until_complete(load_memory(limit=1000))
182
- if not mem:
183
- return None, base_conf
184
- sims = []
185
- for item in mem:
186
- try:
187
- stored_emb = embedding_from_list(item.get("embedding", []))
188
- score = float(util.cos_sim(current_emb, stored_emb).max()) if hasattr(util, "cos_sim") else 0.0
189
- sims.append((score, item))
190
- except Exception:
191
- continue
192
- if not sims:
193
- return None, base_conf
194
- sims.sort(key=lambda x: x[0], reverse=True)
195
- top = sims[:top_k]
196
- agree = 0
197
- disagree = 0
198
- intent_votes = {}
199
- for score, item in top:
200
- item_intent = item.get("intent")
201
- if item_intent == prediction_intent:
202
- agree += score
203
- else:
204
- disagree += score
205
- intent_votes.setdefault(item_intent, 0.0)
206
- intent_votes[item_intent] += score
207
- total = agree + disagree + 1e-9
208
- support_ratio = agree / total
209
- adjusted_conf = base_conf
210
- override = None
211
- best_vote_intent = max(intent_votes.items(), key=lambda x: x[1])[0]
212
- best_vote_score = intent_votes[best_vote_intent] / (sum(intent_votes.values()) + 1e-9)
213
- if support_ratio > 0.6:
214
- adjusted_conf = min(1.0, base_conf + 0.12)
215
- elif support_ratio < 0.4 and best_vote_intent != prediction_intent and best_vote_score > 0.55:
216
- override = best_vote_intent
217
- adjusted_conf = max(0.25, base_conf * 0.5)
218
- else:
219
- adjusted_conf = max(0.02, base_conf * (0.9 + support_ratio * 0.1))
220
- return override, adjusted_conf
221
-
222
- def analyze_decision(text: str, history: List[str]):
223
- tokens = text.lower().split()
224
- if "time" in tokens or "date" in tokens:
225
- return "time", "neutral", 1.0, {"reason": "hard_override_time"}
226
- if "who" in tokens and "you" in tokens:
227
- return "identity", "neutral", 1.0, {"reason": "hard_override_identity"}
228
- if not NEURAL_AVAILABLE or not neural_model:
229
- behavior = detect_behavior(text)
230
- return "chat", behavior, 0.05, {"reason": "no_neural_available"}
231
- emb = neural_model.encode(text, convert_to_tensor=True)
232
- intent_scores = {}
233
- for k, v in anchor_vectors.items():
234
- try:
235
- s = float(util.cos_sim(emb, v).max())
236
- intent_scores[k] = s
237
- except Exception:
238
- intent_scores[k] = 0.0
239
- best_intent = max(intent_scores.items(), key=lambda x: x[1])[0]
240
- best_score = intent_scores[best_intent]
241
- behavior = detect_behavior(text)
242
- flow_bonus = 0.0
243
- if history:
244
- recent = " ".join(history[-2:]).lower()
245
- if best_intent == "coding" and any(w in recent for w in COMMAND_WORDS):
246
- flow_bonus = 0.12
247
- if best_intent == "reasoning" and any(w in recent for w in LEARNING_WORDS):
248
- flow_bonus = 0.12
249
- base_conf = min(1.0, best_score + flow_bonus)
250
- if best_intent == "search" and behavior != "info" and base_conf < 0.50:
251
- base_conf = base_conf * 0.6
252
- best_intent = "chat"
253
  try:
254
- override_intent, adjusted_conf = memory_vote_adjustment(emb, best_intent, behavior, base_conf)
255
- if override_intent:
256
- metadata = {
257
- "base_intent": best_intent,
258
- "override_by_memory": override_intent,
259
- "base_conf": base_conf
260
- }
261
- best_intent = override_intent
262
- final_conf = adjusted_conf
263
- else:
264
- metadata = {"base_intent": best_intent, "base_conf": base_conf, "memory_adjust": False}
265
- final_conf = adjusted_conf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  except Exception as e:
267
- logger.exception("Memory adjust failed: %s", e)
268
- metadata = {"base_intent": best_intent, "base_conf": base_conf, "memory_adjust": "error"}
269
- final_conf = base_conf
270
- final_conf = max(0.0, min(1.0, final_conf))
271
- return best_intent, behavior, final_conf, {**metadata, "intent_scores": intent_scores, "embedding_available": True}
272
 
273
- # ---------------- System prompt + Tool manifest ----------------
 
 
 
274
 
275
  SYSTEM_PREFIX = (
276
  "You are Nexari G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
277
- "Be accurate, friendly, and helpful. "
278
- "You have access to server tools which provide real-time data. DO NOT claim direct internet access. "
279
- "TOOLS AVAILABLE (server-side):\n"
280
- " - get_time(): returns the current server time in IST.\n"
281
- " - web_search(query): returns real-time search results (title, snippet, link).\n\n"
282
- "USAGE PROTOCOL (CRITICAL):\n"
283
- " - If you need the current time, include exactly this token anywhere in your final response: __CALL_GET_TIME__\n"
284
- " - If you need a web search, include exactly this token followed by the query on the same line: __CALL_WEBSEARCH__: <your query here>\n"
285
- " - Do NOT fabricate the results of searches. If SEARCH results are injected, use ONLY those results when answering.\n"
286
- " - If you can answer without calling tools, answer directly. If not sure, you may request a tool using the tokens above.\n\n"
287
  "ATTENTION PROTOCOL: "
288
- "1. Prioritize latest message. "
289
- "2. Use history only for context. "
290
- "3. Topic change = instant switch. "
291
- "4. No repetition unless asked. "
292
- "Use emojis where appropriate."
293
- )
294
-
295
- def trim_context(msgs: List[Dict]):
296
- sys = msgs[0] if msgs and msgs[0]["role"] == "system" else None
297
- body = msgs[1:] if sys else msgs
298
- body = body[-MAX_HISTORY_MESSAGES:]
299
- return ([sys] if sys else []) + body
300
-
301
- def sanitize(chunk: Any):
302
- if isinstance(chunk, dict):
303
- chunk.pop("status", None)
304
- return chunk
305
- return {"text": str(chunk)}
306
-
307
- # ---------------- Helper: normalize model response to text ----------------
308
 
309
- def extract_text_from_model_response(resp):
310
- # robust extraction: handle dict, list, generator-like, or plain string
311
- try:
312
- # if llama-cpp returns a dict with 'choices' or 'content'
313
- if isinstance(resp, dict):
314
- # try standard keys
315
- if "choices" in resp and isinstance(resp["choices"], list) and resp["choices"]:
316
- ch = resp["choices"][0]
317
- if isinstance(ch, dict) and ("text" in ch or "message" in ch):
318
- return ch.get("text") or (ch.get("message") and ch["message"].get("content")) or str(resp)
319
- if "text" in resp:
320
- return resp["text"]
321
- if "content" in resp:
322
- return resp["content"]
323
- return str(resp)
324
- if isinstance(resp, (list, tuple)):
325
- return " ".join(map(str, resp))
326
- # fallback
327
- return str(resp)
328
- except Exception:
329
- return str(resp)
330
 
331
- # ---------------- Chat endpoint (two-pass tool-aware) ----------------
 
 
 
 
 
 
 
 
 
332
 
333
  @app.post("/v1/chat/completions")
334
  async def chat_endpoint(request: ChatRequest):
335
  raw_msgs = [m.dict() for m in request.messages] if request.messages else []
336
- if not raw_msgs:
337
- return JSONResponse({"error": "Empty messages"}, status_code=400)
338
-
339
- user_text = raw_msgs[-1]["content"]
340
- history_texts = [m["content"] for m in raw_msgs[:-1] if m["role"] == "user"]
341
-
342
- intent, behavior, confidence, meta = analyze_decision(user_text, history_texts)
343
- logger.info(f"Decision: intent={intent} behavior={behavior} confidence={confidence:.3f} meta_keys={list(meta.keys())}")
344
 
345
  selected_model = chat_model.model
346
  sys_msg = SYSTEM_PREFIX
347
  status = "Thinking..."
348
  injected_context = ""
349
- # If model choice via decision
 
 
350
  if intent == "coding" and getattr(coder_model, "model", None):
351
  selected_model = coder_model.model
352
  sys_msg += " You are an Expert Coder. Provide clean, working code."
353
  status = "Coding..."
354
- elif intent == "reasoning":
 
 
 
355
  status = "Reasoning..."
 
356
  elif intent == "time":
357
- # direct injection of time; no need to call model tools
358
- injected_context = f"CURRENT DATE & TIME: {get_time()}"
359
  status = "Checking Time..."
 
360
  elif intent == "search":
361
  status = "Searching Web..."
362
- # perform server search proactively when intent==search
363
- res = await web_search(user_text)
 
 
 
364
  if res:
365
- injected_context = f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
 
 
 
 
366
  else:
 
367
  injected_context = ""
368
  status = "Thinking..."
369
 
370
- # Ensure system prefix is present
371
  if raw_msgs[0].get("role") != "system":
372
  raw_msgs.insert(0, {"role":"system","content": sys_msg})
373
  else:
374
  raw_msgs[0]["content"] = sys_msg
375
 
376
- # If we proactively have injected_context (search results or time), attach them
377
  if injected_context:
378
- raw_msgs[-1]['content'] = f"{injected_context}\n\nUSER QUESTION:\n{user_text}"
 
 
 
 
379
 
380
- final_msgs = trim_context(raw_msgs)
 
 
 
381
 
382
- # first pass: run model in non-stream mode to see if model requests a tool
383
- try:
384
- first_resp = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=False)
385
- first_text = extract_text_from_model_response(first_resp)
386
- except Exception as e:
387
- logger.exception("First-pass model call failed: %s", e)
388
- # fallback to streaming directly to avoid blocking
389
- first_text = ""
390
-
391
- # Check for tool-request sentinels in first_text
392
- tool_requested = None
393
- tool_query = None
394
- if "__CALL_GET_TIME__" in first_text:
395
- tool_requested = "get_time"
396
- elif "__CALL_WEBSEARCH__:" in first_text:
397
- # extract query after sentinel
398
- try:
399
- parts = first_text.split("__CALL_WEBSEARCH__:")
400
- if len(parts) >= 2:
401
- tool_query = parts[1].strip().splitlines()[0].strip()
402
- if tool_query:
403
- tool_requested = "web_search"
404
- except Exception:
405
- tool_requested = None
406
-
407
- # If model requested a tool, perform it and re-run model in streaming mode with injected results
408
- if tool_requested == "get_time":
409
- result = get_time()
410
- injected = f"### TOOL_RESULT: GET_TIME\n{result}\n\n---\nNow answer the user's question using the TOOL_RESULT above."
411
- final_msgs[-1]['content'] = f"{injected}\n\nUSER QUESTION:\n{user_text}"
412
- status = "Using get_time..."
413
- # stream final output
414
- def stream_with_tool():
415
- try:
416
- # send decision metadata event
417
- decision_payload = {
418
- "intent": intent,
419
- "behavior": behavior,
420
- "confidence": round(confidence, 4),
421
- "tools_available": True,
422
- "tool_requested": "get_time",
423
- "meta": meta
424
- }
425
- yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
426
- yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
427
- yield ":\n\n"
428
- stream = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=True)
429
- for chunk in stream:
430
- yield f"data: {json.dumps(sanitize(chunk))}\n\n"
431
- yield "data: [DONE]\n\n"
432
- except Exception as e:
433
- logger.exception("Stream with tool error: %s", e)
434
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
435
- return StreamingResponse(stream_with_tool(), media_type="text/event-stream")
436
-
437
- if tool_requested == "web_search" and tool_query:
438
- # do the search
439
- res = await web_search(tool_query)
440
- injected = f"### TOOL_RESULT: WEB_SEARCH for query: {tool_query}\n{res or 'NO_RESULTS'}\n\n---\nNow answer the user's question using the TOOL_RESULT above."
441
- final_msgs[-1]['content'] = f"{injected}\n\nUSER QUESTION:\n{user_text}"
442
- status = "Using web_search..."
443
- def stream_with_tool():
444
- try:
445
- decision_payload = {
446
- "intent": intent,
447
- "behavior": behavior,
448
- "confidence": round(confidence, 4),
449
- "tools_available": True,
450
- "tool_requested": "web_search",
451
- "tool_query": tool_query,
452
- "meta": meta
453
- }
454
- yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
455
- yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
456
- yield ":\n\n"
457
- stream = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=True)
458
- for chunk in stream:
459
- yield f"data: {json.dumps(sanitize(chunk))}\n\n"
460
- yield "data: [DONE]\n\n"
461
- except Exception as e:
462
- logger.exception("Stream with web_search error: %s", e)
463
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
464
- return StreamingResponse(stream_with_tool(), media_type="text/event-stream")
465
-
466
- # If no tool requested or first pass returned a complete answer, stream the first_text or stream normal model output
467
- # If model returned a non-empty phrase in first_text and user wanted streaming false, send that as single data chunk
468
- if first_text and not (tool_requested):
469
- def stream_simple():
470
- try:
471
- decision_payload = {
472
- "intent": intent,
473
- "behavior": behavior,
474
- "confidence": round(confidence, 4),
475
- "tools_available": NEURAL_AVAILABLE,
476
- "meta": meta
477
- }
478
- yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
479
- yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
480
- yield ":\n\n"
481
- # send the precomputed text as one data chunk
482
- yield f"data: {json.dumps({'text': first_text})}\n\n"
483
- yield "data: [DONE]\n\n"
484
- except Exception as e:
485
- logger.exception("stream_simple error: %s", e)
486
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
487
- return StreamingResponse(stream_simple(), media_type="text/event-stream")
488
-
489
- # fallback streaming (if first_text empty or model prefers streaming)
490
- def stream_fallback():
491
  try:
492
- decision_payload = {
493
- "intent": intent,
494
- "behavior": behavior,
495
- "confidence": round(confidence, 4),
496
- "tools_available": NEURAL_AVAILABLE,
497
- "meta": meta
498
- }
499
- yield f"event: decision\ndata: {json.dumps(decision_payload)}\n\n"
500
  yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
501
  yield ":\n\n"
502
- stream = selected_model.create_chat_completion(messages=final_msgs, temperature=request.temperature, stream=True)
 
 
503
  for chunk in stream:
504
- yield f"data: {json.dumps(sanitize(chunk))}\n\n"
505
  yield "data: [DONE]\n\n"
506
  except Exception as e:
507
- logger.exception("stream_fallback error: %s", e)
508
  yield f"data: {json.dumps({'error': str(e)})}\n\n"
509
- return StreamingResponse(stream_fallback(), media_type="text/event-stream")
510
-
511
- # ---------------- Feedback endpoint ----------------
512
-
513
- @app.post("/v1/feedback")
514
- async def feedback_handler(payload: FeedbackPayload):
515
- text = payload.text.strip()
516
- intent = payload.correct_intent
517
- behavior = payload.correct_behavior or detect_behavior(text)
518
- if not text or not intent:
519
- return JSONResponse({"error": "text and correct_intent required"}, status_code=400)
520
- if NEURAL_AVAILABLE and neural_model:
521
- try:
522
- emb = neural_model.encode(text, convert_to_tensor=False)
523
- emb_list = emb.tolist() if hasattr(emb, "tolist") else list(map(float, emb))
524
- except Exception:
525
- emb_list = []
526
- else:
527
- emb_list = []
528
- entry = {
529
- "text": text,
530
- "intent": intent,
531
- "behavior": behavior,
532
- "embedding": emb_list,
533
- "ts": datetime.utcnow().isoformat() + "Z"
534
- }
535
- await append_memory(entry)
536
- logger.info("Feedback saved to memory: intent=%s behavior=%s text=%s", intent, behavior, text[:80])
537
- return {"status": "ok", "saved": True}
538
-
539
- @app.get("/v1/memory/size")
540
- async def memory_size():
541
- mem = await load_memory()
542
- return {"memory_entries": len(mem)}
 
1
+ # app.py — Nexari G1 (Advanced Intent Analysis & Confidence Gating)
 
 
2
  import os
3
  import json
4
  import logging
5
  import asyncio
6
  from datetime import datetime
7
+ import pytz
8
+ from fastapi import FastAPI
9
+ from fastapi.responses import StreamingResponse
10
  from pydantic import BaseModel
11
+ from typing import Any, Dict, List
12
 
13
+ # Local model modules
14
  import coder_model
15
  import chat_model
16
 
17
+ # === SAFE IMPORT FOR NEW LIBRARIES ===
18
  try:
19
  from sentence_transformers import SentenceTransformer, util
20
  from duckduckgo_search import DDGS
21
  NEURAL_AVAILABLE = True
22
+ except ImportError:
23
  NEURAL_AVAILABLE = False
24
+ print("⚠️ WARNING: sentence-transformers or duckduckgo-search not found.")
25
 
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger("nexari.app")
 
30
 
31
  MODEL_DIR = "./models"
32
  NEURAL_DIR = os.path.join(MODEL_DIR, "neural")
 
33
 
34
+ # === CONFIGURATION ===
35
  NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
36
+ neural_classifier = None
37
+ encoded_anchors = {}
38
+ MAX_HISTORY_MESSAGES = 6
 
39
 
40
+ # Optimized Anchors for better Vector Separation
41
  INTENT_ANCHORS = {
42
+ "coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css", "java code"],
43
+ "reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics", "chemistry"],
44
+ "search": ["latest news", "price of gold", "weather today", "who is the ceo", "current stock price", "search google", "find info"],
45
+ "time": ["what time is it", "current time", "date today", "clock", "day is today"],
46
+ # New category to pull "Identity" questions away from Search
47
+ "identity": ["what is your name", "who are you", "who created you", "tell me about yourself", "are you ai"]
 
 
 
 
 
 
 
 
48
  }
49
 
50
+ def ensure_model_dir_or_fail():
51
+ try:
52
+ os.makedirs(MODEL_DIR, exist_ok=True)
53
+ os.makedirs(NEURAL_DIR, exist_ok=True)
54
+ except Exception as e:
55
+ logger.critical("Unable to create model dir: %s", e)
56
 
57
+ # === LOADERS ===
58
 
59
+ def load_neural_network():
60
+ global neural_classifier, encoded_anchors
61
+ if not NEURAL_AVAILABLE: return
62
 
63
+ try:
64
+ logger.info("⏳ Loading Neural Intent Model...")
65
+ model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
66
+
67
+ anchors = {}
68
+ for intent, texts in INTENT_ANCHORS.items():
69
+ anchors[intent] = model.encode(texts, convert_to_tensor=True)
70
+
71
+ neural_classifier = model
72
+ encoded_anchors = anchors
73
+ logger.info("✅ Neural Intent Classifier Ready!")
74
+ except Exception as e:
75
+ logger.error(f"❌ Failed to load Neural Network: {e}")
76
 
77
+ async def load_neural_async():
78
+ await asyncio.to_thread(load_neural_network)
 
 
 
 
 
 
 
 
 
79
 
80
  @app.on_event("startup")
81
+ async def startup_event():
82
+ ensure_model_dir_or_fail()
83
  coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
84
  chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
85
+
86
+ tasks = [
87
+ asyncio.create_task(coder_model.load_model_async()),
88
+ asyncio.create_task(chat_model.load_model_async()),
89
+ asyncio.create_task(load_neural_async()),
90
+ ]
91
+ asyncio.gather(*tasks, return_exceptions=True)
92
+ logger.info("🚀 Server Startup Complete")
 
 
 
93
 
94
  class Message(BaseModel):
95
  role: str
96
  content: str
97
 
98
  class ChatRequest(BaseModel):
99
+ messages: list[Message]
100
  stream: bool = True
101
  temperature: float = 0.7
102
 
103
+ # === TOOLS ===
 
 
 
 
 
104
 
105
+ def get_real_time():
106
+ try:
107
+ ist = pytz.timezone('Asia/Kolkata')
108
+ return datetime.now(ist).strftime("%A, %d %B %Y, %I:%M %p (IST)")
109
+ except Exception:
110
+ return str(datetime.now())
111
 
112
+ def search_sync(query: str):
113
+ logger.info(f"🔎 Executing Search for: {query}")
114
  try:
115
+ with DDGS() as ddgs:
116
+ results = list(ddgs.text(query, max_results=4))
117
+ if not results: return None
118
+ formatted_res = ""
119
+ for r in results:
120
+ formatted_res += f"Source: {r['title']}\nSnippet: {r['body']}\nLink: {r['href']}\n\n"
121
+ return formatted_res
122
  except Exception as e:
123
+ logger.error(f"DDGS Error: {e}")
124
  return None
125
 
126
+ async def perform_web_search(query: str):
127
+ if not NEURAL_AVAILABLE: return None
128
+ return await asyncio.to_thread(search_sync, query)
129
+
130
+ # === ADVANCED INTENT LOGIC (2025 Technique) ===
131
+
132
+ def analyze_deep_intent(text: str):
133
+ """
134
+ Combines Neural Similarity with Confidence Gating & Token Chain Analysis.
135
+ Returns: (intent_name, confidence_score)
136
+ """
137
+ # 1. Low-Level Token Analysis (The Chain Reaction Check)
138
+ text_lower = text.lower()
139
+ tokens = text_lower.split()
140
+
141
+ # GUARDRAIL: Self-Reference Override
142
+ # If user asks about "your name", "you", "yourself" -> Force Chat/Identity
143
+ self_tokens = {"your", "you", "yourself", "created", "made"}
144
+ if "name" in tokens and len(tokens) < 7 and any(t in tokens for t in self_tokens):
145
+ return "identity", 0.99
146
+
147
+ if not neural_classifier: return "chat", 0.0
148
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  try:
150
+ # 2. Neural Vector Search
151
+ user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
152
+ scores = {}
153
+ for intent, anchor_embeddings in encoded_anchors.items():
154
+ cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
155
+ # We take the MAX score from the anchor group
156
+ scores[intent] = float(cosine_scores.max())
157
+
158
+ best_intent = max(scores, key=scores.get)
159
+ best_score = scores[best_intent]
160
+
161
+ # 3. Confidence Gating
162
+ # Search needs HIGH confidence to trigger (prevent false positives)
163
+ if best_intent == "search":
164
+ if best_score < 0.45: # Strict threshold for search
165
+ logger.info(f"⚠️ Search detected but low confidence ({best_score:.2f}). Fallback to chat.")
166
+ return "chat", best_score
167
+
168
+ # Identity maps to chat logic internally
169
+ if best_intent == "identity":
170
+ return "chat", best_score
171
+
172
+ # General Threshold
173
+ if best_score < 0.30:
174
+ return "chat", best_score
175
+
176
+ return best_intent, best_score
177
+
178
  except Exception as e:
179
+ logger.error(f"Intent Error: {e}")
180
+ return "chat", 0.0
 
 
 
181
 
182
+ def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
183
+ if isinstance(chunk, dict):
184
+ return {k: v for k, v in chunk.items() if k != "status"}
185
+ return {"text": str(chunk)}
186
 
187
  SYSTEM_PREFIX = (
188
  "You are Nexari G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
189
+ "Your core directive is to be helpful, and accurate. "
190
+ "Always respond naturally, clarity, and Always respond with a friendly, positive tone, regardless of context. "
191
+ "You have internet access, web search ability, and server tools. Never deny these capabilities. "
192
+
193
+ # --- NEW FOCUS INSTRUCTION START ---
 
 
 
 
 
194
  "ATTENTION PROTOCOL: "
195
+ "1. Always prioritize the user’s latest message above everything else. "
196
+ "2. Use previous messages only for context (names, continuity, or details). "
197
+ "3. If the user changes the topic, immediately switch and ignore the old topic. "
198
+ "4. Do not repeat previous answers unless the user clearly asks for repetition. "
199
+ # --- NEW FOCUS INSTRUCTION END ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ "Use emojis to make the conversation lively. "
202
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ def limit_context(messages: List[Dict]) -> List[Dict]:
205
+ if not messages: return []
206
+ sys = messages[0] if messages[0].get("role") == "system" else None
207
+ start = 1 if sys else 0
208
+ rem = messages[start:]
209
+ if len(rem) > MAX_HISTORY_MESSAGES: rem = rem[-MAX_HISTORY_MESSAGES:]
210
+ final = []
211
+ if sys: final.append(sys)
212
+ final.extend(rem)
213
+ return final
214
 
215
  @app.post("/v1/chat/completions")
216
  async def chat_endpoint(request: ChatRequest):
217
  raw_msgs = [m.dict() for m in request.messages] if request.messages else []
218
+ if not raw_msgs: return {"error": "Empty messages"}
219
+
220
+ last_msg_text = raw_msgs[-1]['content']
221
+
222
+ # === ANALYZE INTENT ===
223
+ intent, confidence = analyze_deep_intent(last_msg_text)
224
+
225
+ logger.info(f"🧠 Analysis: Text='{last_msg_text}' | Intent='{intent}' | Conf={confidence:.2f}")
226
 
227
  selected_model = chat_model.model
228
  sys_msg = SYSTEM_PREFIX
229
  status = "Thinking..."
230
  injected_context = ""
231
+
232
+ # === ROUTING ===
233
+
234
  if intent == "coding" and getattr(coder_model, "model", None):
235
  selected_model = coder_model.model
236
  sys_msg += " You are an Expert Coder. Provide clean, working code."
237
  status = "Coding..."
238
+
239
+ elif intent == "reasoning" and getattr(chat_model, "model", None):
240
+ selected_model = chat_model.model
241
+ sys_msg += " Think step-by-step."
242
  status = "Reasoning..."
243
+
244
  elif intent == "time":
245
+ t = get_real_time()
246
+ injected_context = f"CURRENT DATE & TIME: {t}"
247
  status = "Checking Time..."
248
+
249
  elif intent == "search":
250
  status = "Searching Web..."
251
+ clean_query = last_msg_text.replace("search", "").replace("google", "").strip()
252
+ search_q = clean_query if len(clean_query) > 2 else last_msg_text
253
+
254
+ res = await perform_web_search(search_q)
255
+
256
  if res:
257
+ injected_context = (
258
+ f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
259
+ "### INSTRUCTION:\n"
260
+ "Answer the user's question using ONLY the Search Results above."
261
+ )
262
  else:
263
+ # Silent fallback if search fails
264
  injected_context = ""
265
  status = "Thinking..."
266
 
267
+ # === CONSTRUCT MESSAGE ===
268
  if raw_msgs[0].get("role") != "system":
269
  raw_msgs.insert(0, {"role":"system","content": sys_msg})
270
  else:
271
  raw_msgs[0]["content"] = sys_msg
272
 
 
273
  if injected_context:
274
+ new_content = (
275
+ f"{injected_context}\n\n"
276
+ f"### USER QUESTION:\n{last_msg_text}"
277
+ )
278
+ raw_msgs[-1]['content'] = new_content
279
 
280
+ if not selected_model:
281
+ if chat_model.model: selected_model = chat_model.model
282
+ elif coder_model.model: selected_model = coder_model.model
283
+ else: return {"error": "System warming up..."}
284
 
285
+ final_msgs = limit_context(raw_msgs)
286
+
287
+ def iter_response():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  try:
 
 
 
 
 
 
 
 
289
  yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
290
  yield ":\n\n"
291
+ stream = selected_model.create_chat_completion(
292
+ messages=final_msgs, temperature=request.temperature, stream=True
293
+ )
294
  for chunk in stream:
295
+ yield f"data: {json.dumps(sanitize_chunk(chunk))}\n\n"
296
  yield "data: [DONE]\n\n"
297
  except Exception as e:
298
+ logger.error(f"Stream error: {e}")
299
  yield f"data: {json.dumps({'error': str(e)})}\n\n"
300
+
301
+ return StreamingResponse(iter_response(), media_type="text/event-stream")