Nexari-Research commited on
Commit
ec58582
Β·
verified Β·
1 Parent(s): f936bed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -70
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py β€” Optimized Nexari G1 (Faster & Fixed Search)
2
  import os
3
  import json
4
  import logging
@@ -11,18 +11,17 @@ from pydantic import BaseModel
11
  from typing import Any, Dict, List
12
 
13
  # Local model modules
14
- # NOTE: router_model removed to save RAM as it was unused in logic
15
  import coder_model
16
  import chat_model
17
 
18
  # === SAFE IMPORT FOR NEW LIBRARIES ===
19
  try:
20
  from sentence_transformers import SentenceTransformer, util
21
- from duckduckgo_search import DDGS # Switched to Sync DDGS for stability
22
  NEURAL_AVAILABLE = True
23
  except ImportError:
24
  NEURAL_AVAILABLE = False
25
- print("⚠️ WARNING: sentence-transformers or duckduckgo-search not found. Running in Basic Mode.")
26
 
27
  logging.basicConfig(level=logging.INFO)
28
  logger = logging.getLogger("nexari.app")
@@ -42,7 +41,7 @@ INTENT_ANCHORS = {
42
  "coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css"],
43
  "reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics"],
44
  "sad": ["i am sad", "depressed", "lonely", "feeling low", "heartbroken"],
45
- "search": ["search google", "who is", "latest news", "price of", "weather", "find info", "current status", "gold price"],
46
  "time": ["what time is it", "current time", "date", "clock", "day is today"]
47
  }
48
 
@@ -56,14 +55,11 @@ def ensure_model_dir_or_fail():
56
  # === LOADERS ===
57
 
58
  def load_neural_network():
59
- """Background loader for Neural Network."""
60
  global neural_classifier, encoded_anchors
61
- if not NEURAL_AVAILABLE:
62
- return
63
 
64
  try:
65
- logger.info("⏳ Loading Neural Model (in background)...")
66
- # CPU friendly loading
67
  model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
68
 
69
  anchors = {}
@@ -81,21 +77,17 @@ async def load_neural_async():
81
 
82
  @app.on_event("startup")
83
  async def startup_event():
84
- logger.info("Startup: Initializing Nexari G1 systems...")
85
  ensure_model_dir_or_fail()
86
-
87
  coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
88
  chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
89
 
90
- # Removed Router Model loading to save RAM and reduce Latency
91
  tasks = [
92
  asyncio.create_task(coder_model.load_model_async()),
93
  asyncio.create_task(chat_model.load_model_async()),
94
  asyncio.create_task(load_neural_async()),
95
  ]
96
-
97
  asyncio.gather(*tasks, return_exceptions=True)
98
- logger.info("πŸš€ Server Startup Complete (Models loading in background)")
99
 
100
  class Message(BaseModel):
101
  role: str
@@ -116,46 +108,43 @@ def get_real_time():
116
  return str(datetime.now())
117
 
118
  def search_sync(query: str):
119
- """Synchronous search execution to avoid AsyncDDGS loops issues."""
 
120
  try:
121
  with DDGS() as ddgs:
122
- # Using basic text search, lighter and more reliable
123
- results = list(ddgs.text(query, max_results=3))
124
  if not results:
125
- return "No results found on the web."
126
- formatted = "\n".join([f"- Title: {r['title']}\n Snippet: {r['body']}\n Link: {r['href']}" for r in results])
127
- return formatted
 
 
 
128
  except Exception as e:
129
- logger.error(f"DDGS Sync Search Error: {e}")
130
- return f"Search Error: {str(e)}"
131
 
132
  async def perform_web_search(query: str):
133
- if not NEURAL_AVAILABLE: return "Search unavailable (Lib missing)."
134
- logger.info(f"πŸ” Searching Web for: {query}")
135
- # Running sync code in a thread to keep FastAPI async happy
136
  return await asyncio.to_thread(search_sync, query)
137
 
138
  # === INTENT LOGIC ===
139
 
140
  def get_intent_neural(text: str):
141
- if not neural_classifier:
142
- return None
143
  try:
144
- # Quick check: If text is very short, skip neural overhead
145
  if len(text.split()) < 2: return "chat"
146
-
147
  user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
148
  scores = {}
149
  for intent, anchor_embeddings in encoded_anchors.items():
150
  cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
151
  scores[intent] = float(cosine_scores.max())
152
-
153
  best = max(scores, key=scores.get)
154
- # Threshold adjusted for better accuracy
155
- if scores[best] < 0.30: return "chat"
156
  return best
157
  except Exception:
158
- return None
159
 
160
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
161
  if isinstance(chunk, dict):
@@ -164,11 +153,9 @@ def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
164
 
165
  SYSTEM_PREFIX = (
166
  "You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
167
- "always understand the user behaviour and request. "
168
- "If the user gives a short or simple message, respond instantly without doing deep thinking. "
169
- "always add emoji according to the user question and behaviour. "
170
- "Your name is Nexari-G1. Always be helpful, honest, and clearly identify yourself as Nexari-G1 when appropriate. "
171
- "IMPORTANT: If Search Results are provided below, USE THEM to answer the user's question directly. Do not say you cannot search."
172
  )
173
 
174
  def limit_context(messages: List[Dict]) -> List[Dict]:
@@ -187,21 +174,27 @@ async def chat_endpoint(request: ChatRequest):
187
  raw_msgs = [m.dict() for m in request.messages] if request.messages else []
188
  if not raw_msgs: return {"error": "Empty messages"}
189
 
190
- last_msg = raw_msgs[-1]['content']
191
- intent = get_intent_neural(last_msg) or "chat"
 
192
 
193
- # Fallback keyword check (for when Neural is loading or misses)
194
- if intent == "chat" and len(last_msg) > 3:
195
- lower = last_msg.lower()
 
 
196
  if "time" in lower and ("what" in lower or "tell" in lower): intent = "time"
197
  elif "search" in lower or "google" in lower or "price" in lower or "news" in lower: intent = "search"
198
 
199
  selected_model = chat_model.model
200
  sys_msg = SYSTEM_PREFIX
201
- status = "Analyse Request"
202
- tool_context = ""
 
 
 
 
203
 
204
- # Routing Logic
205
  if intent == "coding" and getattr(coder_model, "model", None):
206
  selected_model = coder_model.model
207
  sys_msg += " You are an Expert Coder. Provide clean, working code."
@@ -209,47 +202,65 @@ async def chat_endpoint(request: ChatRequest):
209
 
210
  elif intent == "reasoning" and getattr(chat_model, "model", None):
211
  selected_model = chat_model.model
212
- sys_msg += " Think step-by-step to solve this."
213
  status = "Reasoning..."
214
 
215
- elif intent == "sad":
216
- status = "Empathizing..."
217
-
218
  elif intent == "time":
219
  t = get_real_time()
220
- tool_context = f"\n\n[SYSTEM UPDATE]: Current Date & Time is {t}. Tell this to the user."
 
221
  status = "Checking Time..."
222
 
223
  elif intent == "search":
224
  status = "Searching Web..."
225
- # Extract query: remove "search" keyword for better results if possible
226
- clean_query = last_msg.replace("search", "").replace("google", "").strip()
227
- search_q = clean_query if len(clean_query) > 3 else last_msg
228
 
229
  res = await perform_web_search(search_q)
230
- tool_context = f"\n\n[WEB SEARCH RESULTS]:\n{res}\n\n[INSTRUCTION]: Use the above search results to answer the user's question accurately."
231
-
232
- # Warmup check
233
- if not selected_model:
234
- # Fallback to whatever model is available if the selected one isn't ready
235
- if chat_model.model: selected_model = chat_model.model
236
- elif coder_model.model: selected_model = coder_model.model
237
- else: return {"error": "Nexari-G1 is warming up, please try again in 20 seconds."}
 
 
 
 
 
 
 
 
238
 
239
- # Construct Message
 
 
240
  if raw_msgs[0].get("role") != "system":
241
  raw_msgs.insert(0, {"role":"system","content": sys_msg})
242
  else:
243
  raw_msgs[0]["content"] = sys_msg
244
-
245
- if tool_context:
246
- # Add tool context to the SYSTEM message for better adherence
247
- raw_msgs[0]["content"] += tool_context
248
 
249
- final_msgs = limit_context(raw_msgs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
- logger.info(f"Intent: {intent} | Model: {'Coder' if selected_model == coder_model.model else 'Chat'}")
252
 
 
253
  def iter_response():
254
  try:
255
  yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
 
1
+ # app.py β€” Fixed RAG Context Injection (Solves Data Ignoring)
2
  import os
3
  import json
4
  import logging
 
11
  from typing import Any, Dict, List
12
 
13
  # Local model modules
 
14
  import coder_model
15
  import chat_model
16
 
17
  # === SAFE IMPORT FOR NEW LIBRARIES ===
18
  try:
19
  from sentence_transformers import SentenceTransformer, util
20
+ from duckduckgo_search import DDGS # Sync DDGS is more reliable
21
  NEURAL_AVAILABLE = True
22
  except ImportError:
23
  NEURAL_AVAILABLE = False
24
+ print("⚠️ WARNING: sentence-transformers or duckduckgo-search not found.")
25
 
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger("nexari.app")
 
41
  "coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css"],
42
  "reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics"],
43
  "sad": ["i am sad", "depressed", "lonely", "feeling low", "heartbroken"],
44
+ "search": ["search google", "who is", "latest news", "price of", "weather", "find info", "current status", "gold price", "stock price"],
45
  "time": ["what time is it", "current time", "date", "clock", "day is today"]
46
  }
47
 
 
55
  # === LOADERS ===
56
 
57
  def load_neural_network():
 
58
  global neural_classifier, encoded_anchors
59
+ if not NEURAL_AVAILABLE: return
 
60
 
61
  try:
62
+ logger.info("⏳ Loading Neural Model...")
 
63
  model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
64
 
65
  anchors = {}
 
77
 
78
  @app.on_event("startup")
79
  async def startup_event():
 
80
  ensure_model_dir_or_fail()
 
81
  coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
82
  chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
83
 
 
84
  tasks = [
85
  asyncio.create_task(coder_model.load_model_async()),
86
  asyncio.create_task(chat_model.load_model_async()),
87
  asyncio.create_task(load_neural_async()),
88
  ]
 
89
  asyncio.gather(*tasks, return_exceptions=True)
90
+ logger.info("πŸš€ Server Startup Complete")
91
 
92
  class Message(BaseModel):
93
  role: str
 
108
  return str(datetime.now())
109
 
110
  def search_sync(query: str):
111
+ """Robust Sync Search with Retry Logic"""
112
+ logger.info(f"πŸ”Ž Executing Search for: {query}")
113
  try:
114
  with DDGS() as ddgs:
115
+ # max_results increased to 4 for better context
116
+ results = list(ddgs.text(query, max_results=4))
117
  if not results:
118
+ return None
119
+
120
+ formatted_res = ""
121
+ for r in results:
122
+ formatted_res += f"Source: {r['title']}\nSnippet: {r['body']}\nLink: {r['href']}\n\n"
123
+ return formatted_res
124
  except Exception as e:
125
+ logger.error(f"DDGS Error: {e}")
126
+ return None
127
 
128
  async def perform_web_search(query: str):
129
+ if not NEURAL_AVAILABLE: return None
 
 
130
  return await asyncio.to_thread(search_sync, query)
131
 
132
  # === INTENT LOGIC ===
133
 
134
  def get_intent_neural(text: str):
135
+ if not neural_classifier: return "chat"
 
136
  try:
 
137
  if len(text.split()) < 2: return "chat"
 
138
  user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
139
  scores = {}
140
  for intent, anchor_embeddings in encoded_anchors.items():
141
  cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
142
  scores[intent] = float(cosine_scores.max())
 
143
  best = max(scores, key=scores.get)
144
+ if scores[best] < 0.28: return "chat"
 
145
  return best
146
  except Exception:
147
+ return "chat"
148
 
149
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
150
  if isinstance(chunk, dict):
 
153
 
154
  SYSTEM_PREFIX = (
155
  "You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
156
+ "Your goal is to provide accurate, helpful, and concise answers. "
157
+ "Always identify yourself as Nexari-G1. "
158
+ "Use emojis to make the conversation lively. "
 
 
159
  )
160
 
161
  def limit_context(messages: List[Dict]) -> List[Dict]:
 
174
  raw_msgs = [m.dict() for m in request.messages] if request.messages else []
175
  if not raw_msgs: return {"error": "Empty messages"}
176
 
177
+ # 1. Capture User's Last Message
178
+ last_user_msg_obj = raw_msgs[-1]
179
+ last_msg_text = last_user_msg_obj['content']
180
 
181
+ intent = get_intent_neural(last_msg_text) or "chat"
182
+
183
+ # Fallback keyword check
184
+ if intent == "chat" and len(last_msg_text) > 3:
185
+ lower = last_msg_text.lower()
186
  if "time" in lower and ("what" in lower or "tell" in lower): intent = "time"
187
  elif "search" in lower or "google" in lower or "price" in lower or "news" in lower: intent = "search"
188
 
189
  selected_model = chat_model.model
190
  sys_msg = SYSTEM_PREFIX
191
+ status = "Thinking..."
192
+
193
+ # Context Injection Variables
194
+ injected_context = ""
195
+
196
+ # === ROUTING & TOOL EXECUTION ===
197
 
 
198
  if intent == "coding" and getattr(coder_model, "model", None):
199
  selected_model = coder_model.model
200
  sys_msg += " You are an Expert Coder. Provide clean, working code."
 
202
 
203
  elif intent == "reasoning" and getattr(chat_model, "model", None):
204
  selected_model = chat_model.model
205
+ sys_msg += " Think step-by-step."
206
  status = "Reasoning..."
207
 
 
 
 
208
  elif intent == "time":
209
  t = get_real_time()
210
+ # Inject directly into user prompt for high attention
211
+ injected_context = f"CURRENT DATE & TIME: {t}"
212
  status = "Checking Time..."
213
 
214
  elif intent == "search":
215
  status = "Searching Web..."
216
+ clean_query = last_msg_text.replace("search", "").replace("google", "").strip()
217
+ search_q = clean_query if len(clean_query) > 2 else last_msg_text
 
218
 
219
  res = await perform_web_search(search_q)
220
+
221
+ if res:
222
+ # STRONG INJECTION PATTERN
223
+ injected_context = (
224
+ f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
225
+ "### INSTRUCTION:\n"
226
+ "Answer the user's question using ONLY the Search Results above. "
227
+ "Do NOT use your internal training data if it conflicts. "
228
+ "If the price or data is in the results, state it clearly."
229
+ )
230
+ else:
231
+ injected_context = (
232
+ "### SYSTEM NOTE:\n"
233
+ "Attempted to search the web but found no results. "
234
+ "Please politely inform the user that you couldn't find current info."
235
+ )
236
 
237
+ # === MESSAGE CONSTRUCTION ===
238
+
239
+ # 1. Set System Prompt
240
  if raw_msgs[0].get("role") != "system":
241
  raw_msgs.insert(0, {"role":"system","content": sys_msg})
242
  else:
243
  raw_msgs[0]["content"] = sys_msg
 
 
 
 
244
 
245
+ # 2. INJECT CONTEXT INTO LAST USER MESSAGE (Crucial Fix)
246
+ # This forces the model to see the context immediately before generating the answer.
247
+ if injected_context:
248
+ new_content = (
249
+ f"{injected_context}\n\n"
250
+ f"### USER QUESTION:\n{last_msg_text}"
251
+ )
252
+ # Update the last message in the list
253
+ raw_msgs[-1]['content'] = new_content
254
+
255
+ # 3. Model Warmup Check
256
+ if not selected_model:
257
+ if chat_model.model: selected_model = chat_model.model
258
+ elif coder_model.model: selected_model = coder_model.model
259
+ else: return {"error": "System warming up..."}
260
 
261
+ final_msgs = limit_context(raw_msgs)
262
 
263
+ # Streamer
264
  def iter_response():
265
  try:
266
  yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"