Nexari-Research commited on
Commit
af8ad70
·
verified ·
1 Parent(s): 0aff05c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -135
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — Optimized for Speed + Original System Instructions Restored
2
  import os
3
  import json
4
  import logging
@@ -8,7 +8,10 @@ from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
  from typing import Any, Dict, List
10
 
11
- # Local model modules (expect these to exist in your project)
 
 
 
12
  import router_model
13
  import coder_model
14
  import chat_model
@@ -19,14 +22,33 @@ logger = logging.getLogger("nexari.app")
19
  app = FastAPI()
20
 
21
  MODEL_DIR = "./models"
22
- # Limit history to keep CPU processing fast (System + 6 recent messages)
23
- # Isse response time increase nahi hoga chahe chat kitni bhi lambi ho.
24
  MAX_HISTORY_MESSAGES = 6
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def ensure_model_dir_or_fail():
27
  try:
28
  os.makedirs(MODEL_DIR, exist_ok=True)
29
- logger.info("Model directory ensured: %s", MODEL_DIR)
30
  except Exception as e:
31
  logger.critical("Unable to create model dir: %s", e)
32
  raise
@@ -40,15 +62,26 @@ async def startup_event():
40
  coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
41
  chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
42
 
 
43
  tasks = [
44
  asyncio.create_task(router_model.load_model_async()),
45
  asyncio.create_task(coder_model.load_model_async()),
46
  asyncio.create_task(chat_model.load_model_async()),
47
  ]
48
- results = await asyncio.gather(*tasks, return_exceptions=True)
49
- for i, r in enumerate(results):
50
- if isinstance(r, Exception):
51
- logger.error("Model loader %d failed: %s", i, r)
 
 
 
 
 
 
 
 
 
 
52
  logger.info("Startup complete.")
53
 
54
  class Message(BaseModel):
@@ -60,19 +93,61 @@ class ChatRequest(BaseModel):
60
  stream: bool = True
61
  temperature: float = 0.7
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def get_intent(last_user_message: str):
64
- # Optimization: Short messages skip the router to save 2-3 seconds
65
- if len(last_user_message) < 15:
 
66
  return "chat", "neutral"
67
 
68
- # If router model missing, use a simple rule
 
 
 
 
 
 
 
69
  if not getattr(router_model, "model", None):
70
- text = (last_user_message or "").lower()
71
- if any(tok in text for tok in ["code", "bug", "fix", "error", "function", "python", "js", "html", "css"]):
72
- return "coding", "neutral"
73
- if any(tok in text for tok in ["why", "how", "prove", "reason", "think"]):
74
- return "reasoning", "neutral"
75
- return "chat", "neutral"
76
 
77
  sys_prompt = "Analyze intent. Return JSON like {'intent':'coding'|'chat'|'reasoning', 'sentiment':'neutral'|'sad'}"
78
  try:
@@ -80,54 +155,25 @@ def get_intent(last_user_message: str):
80
  messages=[{"role":"system","content":sys_prompt},{"role":"user","content": last_user_message}],
81
  temperature=0.1, max_tokens=50
82
  )
83
- content = ""
84
- try:
85
- content = res['choices'][0]['message']['content'].lower()
86
- except Exception:
87
- try:
88
- content = res['choices'][0]['text'].lower()
89
- except Exception:
90
- content = ""
91
- if "coding" in content:
92
- return "coding", "neutral"
93
- if "reasoning" in content or "think" in content or "solve" in content:
94
- return "reasoning", "neutral"
95
- if "sad" in content:
96
- return "chat", "sad"
97
  return "chat", "neutral"
98
- except Exception as e:
99
- logger.exception("Router failure: %s", e)
100
  return "chat", "neutral"
101
 
102
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
103
- """
104
- Ensure chunk is a JSON-serializable mapping for SSE.
105
- Remove any 'status' fields so we never send an unintended status overwrite.
106
- """
107
  if isinstance(chunk, dict):
108
  out = {}
109
  for k, v in chunk.items():
110
- if k == "status":
111
- logger.debug("Dropping status field from model chunk: %s", v)
112
- continue
113
- if isinstance(v, (str, int, float, bool, type(None))):
114
- out[k] = v
115
- else:
116
- try:
117
- json.dumps(v)
118
- out[k] = v
119
- except Exception:
120
- out[k] = str(v)
121
  return out
122
- else:
123
- try:
124
- txt = str(chunk)
125
- return {"text": txt}
126
- except Exception:
127
- return {"text": "[UNSERIALIZABLE_CHUNK]"}
128
 
129
- # === RESTORED ORIGINAL SYSTEM INSTRUCTIONS ===
130
- # Static system identity prefix to include in system prompts:
131
  SYSTEM_IDENTITY_PREFIX = (
132
  "You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
133
  "always understand the user behaviour and request. "
@@ -137,73 +183,49 @@ SYSTEM_IDENTITY_PREFIX = (
137
  )
138
 
139
  def limit_context(messages: List[Dict]) -> List[Dict]:
140
- """
141
- Smart Context Trimming (Speed Fix):
142
- 1. Always keep the SYSTEM prompt (Identity/Instructions).
143
- 2. Keep only the last MAX_HISTORY_MESSAGES.
144
- This ensures processing time stays fast even after 100 turns.
145
- """
146
- if not messages:
147
- return []
148
 
149
- system_msg = None
150
- chat_history = []
151
-
152
- # Separate system message
153
- if messages[0].get("role") == "system":
154
- system_msg = messages[0]
155
- remaining = messages[1:]
156
- else:
157
- remaining = messages
158
-
159
- # Keep only the last N messages
160
  if len(remaining) > MAX_HISTORY_MESSAGES:
161
- chat_history = remaining[-MAX_HISTORY_MESSAGES:]
162
- else:
163
- chat_history = remaining
164
-
165
- # Reconstruct
166
  final_msgs = []
167
- if system_msg:
168
- final_msgs.append(system_msg)
169
- final_msgs.extend(chat_history)
170
-
171
  return final_msgs
172
 
173
  @app.post("/v1/chat/completions")
174
  async def chat_endpoint(request: ChatRequest):
175
- # Validate incoming
176
  raw_messages = [m.dict() for m in request.messages] if request.messages else []
177
- if not raw_messages:
178
- return {"error": "No messages provided."}
179
  last = raw_messages[-1]['content']
180
 
 
181
  intent, sentiment = get_intent(last)
182
 
183
  selected_model = None
184
- # base system message will always include identity prefix
185
  sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful assistant."
186
- status_indicator = "Thinking..." # default if not changed below
187
 
188
  if intent == "coding":
189
  if not getattr(coder_model, "model", None):
190
- logger.error("Coder model not available.")
191
  return {"error":"Coder model not available."}
192
  selected_model = coder_model.model
193
- sys_msg = SYSTEM_IDENTITY_PREFIX + "You are an expert Coding Assistant. Write clean, efficient code with comments where helpful."
194
- status_indicator = "Coding..."
195
- logger.info("Intent: CODING")
196
  elif intent == "reasoning":
197
  if not getattr(chat_model, "model", None):
198
- logger.error("Chat model not available for reasoning.")
199
  return {"error":"Model not available."}
200
  selected_model = chat_model.model
201
- sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a reasoning-focused assistant. Walk through your thinking clearly and show steps if relevant."
202
- status_indicator = "Reasoning..."
203
- logger.info("Intent: REASONING")
204
  else:
205
  if not getattr(chat_model, "model", None):
206
- logger.error("Chat model missing.")
207
  return {"error":"Chat model not available."}
208
  selected_model = chat_model.model
209
  logger.info("Intent: CHAT (%s)", sentiment)
@@ -211,60 +233,30 @@ async def chat_endpoint(request: ChatRequest):
211
  sys_msg = SYSTEM_IDENTITY_PREFIX + "You are empathic and calm. Provide supportive, concise responses."
212
  status_indicator = "Empathizing..."
213
  else:
214
- # default chat system message with identity included
215
  sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful conversational assistant."
216
 
217
- # ensure system prompt is present (first message)
218
  if raw_messages[0].get("role") != "system":
219
  raw_messages.insert(0, {"role":"system","content": sys_msg})
220
  else:
221
- # replace existing system content to ensure identity is present and consistent
222
  raw_messages[0]["content"] = sys_msg
223
 
224
- # === APPLY OPTIMIZATION: TRIM CONTEXT ===
225
- # Yeh line add ki hai taaki model sirf relevant history process kare aur fast rahe
226
  optimized_messages = limit_context(raw_messages)
227
 
228
- # Streaming generator
229
  def iter_response():
230
  try:
231
- # 1) Send a single authoritative SSE status event (event: status)
232
- status_payload = json.dumps({"status": status_indicator})
233
- event_payload = f"event: status\n"
234
- event_payload += f"data: {status_payload}\n\n"
235
- logger.info("Sending authoritative status event: %s", status_indicator)
236
- yield event_payload
237
-
238
- # 2) small flush hint
239
  yield ":\n\n"
240
-
241
- # 3) Start streaming model output
242
  stream = selected_model.create_chat_completion(
243
- messages=optimized_messages, # USING OPTIMIZED MESSAGES
244
  temperature=request.temperature,
245
  stream=True
246
  )
247
-
248
- # Iterate model generator and sanitize every chunk
249
  for chunk in stream:
250
- safe = sanitize_chunk(chunk)
251
- try:
252
- yield f"data: {json.dumps(safe)}\n\n"
253
- except Exception:
254
- # fallback to a safe string representation
255
- yield f"data: {json.dumps({'text': str(safe)})}\n\n"
256
-
257
- # 4) final done marker
258
  yield "data: [DONE]\n\n"
259
- logger.info("Stream finished for request (status was: %s)", status_indicator)
260
-
261
  except Exception as e:
262
- logger.exception("Streaming error: %s", e)
263
- # send explicit error object
264
- try:
265
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
266
- except Exception:
267
- yield "data: {\"error\":\"streaming failure\"}\n\n"
268
- yield "data: [DONE]\n\n"
269
 
270
  return StreamingResponse(iter_response(), media_type="text/event-stream")
 
1
+ # app.py — Hybrid Neural Router + Optimized Performance + Original Identity
2
  import os
3
  import json
4
  import logging
 
8
  from pydantic import BaseModel
9
  from typing import Any, Dict, List
10
 
11
+ # New Neural Network Library
12
+ from sentence_transformers import SentenceTransformer, util
13
+
14
+ # Local model modules
15
  import router_model
16
  import coder_model
17
  import chat_model
 
22
  app = FastAPI()
23
 
24
  MODEL_DIR = "./models"
 
 
25
  MAX_HISTORY_MESSAGES = 6
26
 
27
+ # === NEURAL NETWORK CONFIGURATION ===
28
+ # Using a lightweight, high-speed embedding model (State of the Art for speed/accuracy)
29
+ NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
30
+ neural_classifier = None
31
+
32
+ # "Anchors" define the center of gravity for each intent in the Neural Space
33
+ INTENT_ANCHORS = {
34
+ "coding": [
35
+ "write python code", "fix this bug", "create a function", "html css script",
36
+ "debug this error", "generate java code", "react component", "sql query"
37
+ ],
38
+ "reasoning": [
39
+ "solve this math problem", "explain the logic", "why does this happen",
40
+ "prove that", "step by step reasoning", "analyze this complex topic"
41
+ ],
42
+ "sad": [
43
+ "i am feeling sad", "i am depressed", "life is hard", "i am lonely",
44
+ "i feel like crying", "everything is going wrong"
45
+ ]
46
+ }
47
+ encoded_anchors = {}
48
+
49
  def ensure_model_dir_or_fail():
50
  try:
51
  os.makedirs(MODEL_DIR, exist_ok=True)
 
52
  except Exception as e:
53
  logger.critical("Unable to create model dir: %s", e)
54
  raise
 
62
  coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
63
  chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
64
 
65
+ # Load LLMs asynchronously
66
  tasks = [
67
  asyncio.create_task(router_model.load_model_async()),
68
  asyncio.create_task(coder_model.load_model_async()),
69
  asyncio.create_task(chat_model.load_model_async()),
70
  ]
71
+
72
+ # Load Neural Network Classifier (Runs on CPU, very fast)
73
+ global neural_classifier, encoded_anchors
74
+ try:
75
+ logger.info("Loading Neural Intent Classifier...")
76
+ neural_classifier = SentenceTransformer(NEURAL_MODEL_NAME)
77
+ # Pre-calculate anchor vectors so we don't do it every request (Optimization)
78
+ for intent, texts in INTENT_ANCHORS.items():
79
+ encoded_anchors[intent] = neural_classifier.encode(texts, convert_to_tensor=True)
80
+ logger.info("Neural Intent Classifier Ready.")
81
+ except Exception as e:
82
+ logger.error(f"Failed to load Neural Classifier: {e}")
83
+
84
+ await asyncio.gather(*tasks, return_exceptions=True)
85
  logger.info("Startup complete.")
86
 
87
  class Message(BaseModel):
 
93
  stream: bool = True
94
  temperature: float = 0.7
95
 
96
+ def get_intent_neural(text: str):
97
+ """
98
+ Uses Vector Embeddings & Cosine Similarity to detect intent.
99
+ This is the "Neural Function" connecting the router.
100
+ """
101
+ if not neural_classifier:
102
+ return None, None
103
+
104
+ try:
105
+ # 1. Convert user text to Vector
106
+ user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
107
+
108
+ scores = {}
109
+ # 2. Compare against all Anchor Vectors (Cosine Similarity)
110
+ for intent, anchor_embeddings in encoded_anchors.items():
111
+ # Find max similarity with any anchor phrase in this category
112
+ cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
113
+ best_score = float(cosine_scores.max())
114
+ scores[intent] = best_score
115
+
116
+ # Find the winner
117
+ best_intent = max(scores, key=scores.get)
118
+ confidence = scores[best_intent]
119
+
120
+ logger.info(f"Neural Intent Analysis: {scores} -> Winner: {best_intent}")
121
+
122
+ # Threshold check: If confidence is low (< 0.3), treat as general chat
123
+ if confidence < 0.35:
124
+ return "chat", "neutral"
125
+
126
+ if best_intent == "coding": return "coding", "neutral"
127
+ if best_intent == "reasoning": return "reasoning", "neutral"
128
+ if best_intent == "sad": return "chat", "sad"
129
+
130
+ return "chat", "neutral"
131
+ except Exception as e:
132
+ logger.error(f"Neural Check Failed: {e}")
133
+ return None, None
134
+
135
  def get_intent(last_user_message: str):
136
+ # 1. Ultra-Fast Keyword Check (Legacy)
137
+ # Short circuit for very short messages
138
+ if len(last_user_message) < 5:
139
  return "chat", "neutral"
140
 
141
+ # 2. NEURAL NETWORK CHECK (The Upgrade)
142
+ # This understands meaning, not just keywords.
143
+ neural_intent, neural_sentiment = get_intent_neural(last_user_message)
144
+ if neural_intent:
145
+ return neural_intent, neural_sentiment
146
+
147
+ # 3. Fallback to Generative Router (If Neural Network is unsure or fails)
148
+ # Only runs if neural check was inconclusive or library failed
149
  if not getattr(router_model, "model", None):
150
+ return "chat", "neutral"
 
 
 
 
 
151
 
152
  sys_prompt = "Analyze intent. Return JSON like {'intent':'coding'|'chat'|'reasoning', 'sentiment':'neutral'|'sad'}"
153
  try:
 
155
  messages=[{"role":"system","content":sys_prompt},{"role":"user","content": last_user_message}],
156
  temperature=0.1, max_tokens=50
157
  )
158
+ content = res['choices'][0]['message']['content'].lower()
159
+
160
+ if "coding" in content: return "coding", "neutral"
161
+ if "reasoning" in content: return "reasoning", "neutral"
162
+ if "sad" in content: return "chat", "sad"
 
 
 
 
 
 
 
 
 
163
  return "chat", "neutral"
164
+ except Exception:
 
165
  return "chat", "neutral"
166
 
167
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
 
 
 
 
168
  if isinstance(chunk, dict):
169
  out = {}
170
  for k, v in chunk.items():
171
+ if k == "status": continue
172
+ out[k] = v
 
 
 
 
 
 
 
 
 
173
  return out
174
+ return {"text": str(chunk)}
 
 
 
 
 
175
 
176
+ # === ORIGINAL SYSTEM INSTRUCTIONS ===
 
177
  SYSTEM_IDENTITY_PREFIX = (
178
  "You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
179
  "always understand the user behaviour and request. "
 
183
  )
184
 
185
  def limit_context(messages: List[Dict]) -> List[Dict]:
186
+ if not messages: return []
187
+ system_msg = messages[0] if messages[0].get("role") == "system" else None
188
+ start_idx = 1 if system_msg else 0
189
+ remaining = messages[start_idx:]
 
 
 
 
190
 
191
+ # Smart Trimming
 
 
 
 
 
 
 
 
 
 
192
  if len(remaining) > MAX_HISTORY_MESSAGES:
193
+ remaining = remaining[-MAX_HISTORY_MESSAGES:]
194
+
 
 
 
195
  final_msgs = []
196
+ if system_msg: final_msgs.append(system_msg)
197
+ final_msgs.extend(remaining)
 
 
198
  return final_msgs
199
 
200
  @app.post("/v1/chat/completions")
201
  async def chat_endpoint(request: ChatRequest):
 
202
  raw_messages = [m.dict() for m in request.messages] if request.messages else []
203
+ if not raw_messages: return {"error": "No messages provided."}
 
204
  last = raw_messages[-1]['content']
205
 
206
+ # Get Intent using the new Neural Pipeline
207
  intent, sentiment = get_intent(last)
208
 
209
  selected_model = None
 
210
  sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful assistant."
211
+ status_indicator = "Thinking..."
212
 
213
  if intent == "coding":
214
  if not getattr(coder_model, "model", None):
 
215
  return {"error":"Coder model not available."}
216
  selected_model = coder_model.model
217
+ sys_msg = SYSTEM_IDENTITY_PREFIX + "You are an expert Coding Assistant. Write clean, efficient code with comments."
218
+ status_indicator = "Coding (Neural Mode)..."
219
+ logger.info("Intent: CODING (Neural)")
220
  elif intent == "reasoning":
221
  if not getattr(chat_model, "model", None):
 
222
  return {"error":"Model not available."}
223
  selected_model = chat_model.model
224
+ sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a reasoning-focused assistant. Walk through your thinking clearly."
225
+ status_indicator = "Reasoning (Neural Mode)..."
226
+ logger.info("Intent: REASONING (Neural)")
227
  else:
228
  if not getattr(chat_model, "model", None):
 
229
  return {"error":"Chat model not available."}
230
  selected_model = chat_model.model
231
  logger.info("Intent: CHAT (%s)", sentiment)
 
233
  sys_msg = SYSTEM_IDENTITY_PREFIX + "You are empathic and calm. Provide supportive, concise responses."
234
  status_indicator = "Empathizing..."
235
  else:
 
236
  sys_msg = SYSTEM_IDENTITY_PREFIX + "You are a helpful conversational assistant."
237
 
 
238
  if raw_messages[0].get("role") != "system":
239
  raw_messages.insert(0, {"role":"system","content": sys_msg})
240
  else:
 
241
  raw_messages[0]["content"] = sys_msg
242
 
 
 
243
  optimized_messages = limit_context(raw_messages)
244
 
 
245
  def iter_response():
246
  try:
247
+ yield f"event: status\ndata: {json.dumps({'status': status_indicator})}\n\n"
 
 
 
 
 
 
 
248
  yield ":\n\n"
249
+
 
250
  stream = selected_model.create_chat_completion(
251
+ messages=optimized_messages,
252
  temperature=request.temperature,
253
  stream=True
254
  )
 
 
255
  for chunk in stream:
256
+ yield f"data: {json.dumps(sanitize_chunk(chunk))}\n\n"
 
 
 
 
 
 
 
257
  yield "data: [DONE]\n\n"
 
 
258
  except Exception as e:
259
+ logger.exception("Stream Error: %s", e)
260
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
 
 
 
 
 
261
 
262
  return StreamingResponse(iter_response(), media_type="text/event-stream")