Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py β
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
import logging
|
|
@@ -11,18 +11,17 @@ from pydantic import BaseModel
|
|
| 11 |
from typing import Any, Dict, List
|
| 12 |
|
| 13 |
# Local model modules
|
| 14 |
-
# NOTE: router_model removed to save RAM as it was unused in logic
|
| 15 |
import coder_model
|
| 16 |
import chat_model
|
| 17 |
|
| 18 |
# === SAFE IMPORT FOR NEW LIBRARIES ===
|
| 19 |
try:
|
| 20 |
from sentence_transformers import SentenceTransformer, util
|
| 21 |
-
from duckduckgo_search import DDGS #
|
| 22 |
NEURAL_AVAILABLE = True
|
| 23 |
except ImportError:
|
| 24 |
NEURAL_AVAILABLE = False
|
| 25 |
-
print("β οΈ WARNING: sentence-transformers or duckduckgo-search not found.
|
| 26 |
|
| 27 |
logging.basicConfig(level=logging.INFO)
|
| 28 |
logger = logging.getLogger("nexari.app")
|
|
@@ -42,7 +41,7 @@ INTENT_ANCHORS = {
|
|
| 42 |
"coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css"],
|
| 43 |
"reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics"],
|
| 44 |
"sad": ["i am sad", "depressed", "lonely", "feeling low", "heartbroken"],
|
| 45 |
-
"search": ["search google", "who is", "latest news", "price of", "weather", "find info", "current status", "gold price"],
|
| 46 |
"time": ["what time is it", "current time", "date", "clock", "day is today"]
|
| 47 |
}
|
| 48 |
|
|
@@ -56,14 +55,11 @@ def ensure_model_dir_or_fail():
|
|
| 56 |
# === LOADERS ===
|
| 57 |
|
| 58 |
def load_neural_network():
|
| 59 |
-
"""Background loader for Neural Network."""
|
| 60 |
global neural_classifier, encoded_anchors
|
| 61 |
-
if not NEURAL_AVAILABLE:
|
| 62 |
-
return
|
| 63 |
|
| 64 |
try:
|
| 65 |
-
logger.info("β³ Loading Neural Model
|
| 66 |
-
# CPU friendly loading
|
| 67 |
model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
|
| 68 |
|
| 69 |
anchors = {}
|
|
@@ -81,21 +77,17 @@ async def load_neural_async():
|
|
| 81 |
|
| 82 |
@app.on_event("startup")
|
| 83 |
async def startup_event():
|
| 84 |
-
logger.info("Startup: Initializing Nexari G1 systems...")
|
| 85 |
ensure_model_dir_or_fail()
|
| 86 |
-
|
| 87 |
coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
|
| 88 |
chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
|
| 89 |
|
| 90 |
-
# Removed Router Model loading to save RAM and reduce Latency
|
| 91 |
tasks = [
|
| 92 |
asyncio.create_task(coder_model.load_model_async()),
|
| 93 |
asyncio.create_task(chat_model.load_model_async()),
|
| 94 |
asyncio.create_task(load_neural_async()),
|
| 95 |
]
|
| 96 |
-
|
| 97 |
asyncio.gather(*tasks, return_exceptions=True)
|
| 98 |
-
logger.info("π Server Startup Complete
|
| 99 |
|
| 100 |
class Message(BaseModel):
|
| 101 |
role: str
|
|
@@ -116,46 +108,43 @@ def get_real_time():
|
|
| 116 |
return str(datetime.now())
|
| 117 |
|
| 118 |
def search_sync(query: str):
|
| 119 |
-
"""
|
|
|
|
| 120 |
try:
|
| 121 |
with DDGS() as ddgs:
|
| 122 |
-
#
|
| 123 |
-
results = list(ddgs.text(query, max_results=
|
| 124 |
if not results:
|
| 125 |
-
return
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
| 128 |
except Exception as e:
|
| 129 |
-
logger.error(f"DDGS
|
| 130 |
-
return
|
| 131 |
|
| 132 |
async def perform_web_search(query: str):
|
| 133 |
-
if not NEURAL_AVAILABLE: return
|
| 134 |
-
logger.info(f"π Searching Web for: {query}")
|
| 135 |
-
# Running sync code in a thread to keep FastAPI async happy
|
| 136 |
return await asyncio.to_thread(search_sync, query)
|
| 137 |
|
| 138 |
# === INTENT LOGIC ===
|
| 139 |
|
| 140 |
def get_intent_neural(text: str):
|
| 141 |
-
if not neural_classifier:
|
| 142 |
-
return None
|
| 143 |
try:
|
| 144 |
-
# Quick check: If text is very short, skip neural overhead
|
| 145 |
if len(text.split()) < 2: return "chat"
|
| 146 |
-
|
| 147 |
user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
|
| 148 |
scores = {}
|
| 149 |
for intent, anchor_embeddings in encoded_anchors.items():
|
| 150 |
cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
|
| 151 |
scores[intent] = float(cosine_scores.max())
|
| 152 |
-
|
| 153 |
best = max(scores, key=scores.get)
|
| 154 |
-
|
| 155 |
-
if scores[best] < 0.30: return "chat"
|
| 156 |
return best
|
| 157 |
except Exception:
|
| 158 |
-
return
|
| 159 |
|
| 160 |
def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
|
| 161 |
if isinstance(chunk, dict):
|
|
@@ -164,11 +153,9 @@ def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
|
|
| 164 |
|
| 165 |
SYSTEM_PREFIX = (
|
| 166 |
"You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
-
"
|
| 170 |
-
"Your name is Nexari-G1. Always be helpful, honest, and clearly identify yourself as Nexari-G1 when appropriate. "
|
| 171 |
-
"IMPORTANT: If Search Results are provided below, USE THEM to answer the user's question directly. Do not say you cannot search."
|
| 172 |
)
|
| 173 |
|
| 174 |
def limit_context(messages: List[Dict]) -> List[Dict]:
|
|
@@ -187,21 +174,27 @@ async def chat_endpoint(request: ChatRequest):
|
|
| 187 |
raw_msgs = [m.dict() for m in request.messages] if request.messages else []
|
| 188 |
if not raw_msgs: return {"error": "Empty messages"}
|
| 189 |
|
| 190 |
-
|
| 191 |
-
|
|
|
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
| 196 |
if "time" in lower and ("what" in lower or "tell" in lower): intent = "time"
|
| 197 |
elif "search" in lower or "google" in lower or "price" in lower or "news" in lower: intent = "search"
|
| 198 |
|
| 199 |
selected_model = chat_model.model
|
| 200 |
sys_msg = SYSTEM_PREFIX
|
| 201 |
-
status = "
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
-
# Routing Logic
|
| 205 |
if intent == "coding" and getattr(coder_model, "model", None):
|
| 206 |
selected_model = coder_model.model
|
| 207 |
sys_msg += " You are an Expert Coder. Provide clean, working code."
|
|
@@ -209,47 +202,65 @@ async def chat_endpoint(request: ChatRequest):
|
|
| 209 |
|
| 210 |
elif intent == "reasoning" and getattr(chat_model, "model", None):
|
| 211 |
selected_model = chat_model.model
|
| 212 |
-
sys_msg += " Think step-by-step
|
| 213 |
status = "Reasoning..."
|
| 214 |
|
| 215 |
-
elif intent == "sad":
|
| 216 |
-
status = "Empathizing..."
|
| 217 |
-
|
| 218 |
elif intent == "time":
|
| 219 |
t = get_real_time()
|
| 220 |
-
|
|
|
|
| 221 |
status = "Checking Time..."
|
| 222 |
|
| 223 |
elif intent == "search":
|
| 224 |
status = "Searching Web..."
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
search_q = clean_query if len(clean_query) > 3 else last_msg
|
| 228 |
|
| 229 |
res = await perform_web_search(search_q)
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
-
#
|
|
|
|
|
|
|
| 240 |
if raw_msgs[0].get("role") != "system":
|
| 241 |
raw_msgs.insert(0, {"role":"system","content": sys_msg})
|
| 242 |
else:
|
| 243 |
raw_msgs[0]["content"] = sys_msg
|
| 244 |
-
|
| 245 |
-
if tool_context:
|
| 246 |
-
# Add tool context to the SYSTEM message for better adherence
|
| 247 |
-
raw_msgs[0]["content"] += tool_context
|
| 248 |
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
-
|
| 252 |
|
|
|
|
| 253 |
def iter_response():
|
| 254 |
try:
|
| 255 |
yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
|
|
|
|
| 1 |
+
# app.py β Fixed RAG Context Injection (Solves Data Ignoring)
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
import logging
|
|
|
|
| 11 |
from typing import Any, Dict, List
|
| 12 |
|
| 13 |
# Local model modules
|
|
|
|
| 14 |
import coder_model
|
| 15 |
import chat_model
|
| 16 |
|
| 17 |
# === SAFE IMPORT FOR NEW LIBRARIES ===
|
| 18 |
try:
|
| 19 |
from sentence_transformers import SentenceTransformer, util
|
| 20 |
+
from duckduckgo_search import DDGS # Sync DDGS is more reliable
|
| 21 |
NEURAL_AVAILABLE = True
|
| 22 |
except ImportError:
|
| 23 |
NEURAL_AVAILABLE = False
|
| 24 |
+
print("β οΈ WARNING: sentence-transformers or duckduckgo-search not found.")
|
| 25 |
|
| 26 |
logging.basicConfig(level=logging.INFO)
|
| 27 |
logger = logging.getLogger("nexari.app")
|
|
|
|
| 41 |
"coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css"],
|
| 42 |
"reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics"],
|
| 43 |
"sad": ["i am sad", "depressed", "lonely", "feeling low", "heartbroken"],
|
| 44 |
+
"search": ["search google", "who is", "latest news", "price of", "weather", "find info", "current status", "gold price", "stock price"],
|
| 45 |
"time": ["what time is it", "current time", "date", "clock", "day is today"]
|
| 46 |
}
|
| 47 |
|
|
|
|
| 55 |
# === LOADERS ===
|
| 56 |
|
| 57 |
def load_neural_network():
|
|
|
|
| 58 |
global neural_classifier, encoded_anchors
|
| 59 |
+
if not NEURAL_AVAILABLE: return
|
|
|
|
| 60 |
|
| 61 |
try:
|
| 62 |
+
logger.info("β³ Loading Neural Model...")
|
|
|
|
| 63 |
model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
|
| 64 |
|
| 65 |
anchors = {}
|
|
|
|
| 77 |
|
| 78 |
@app.on_event("startup")
|
| 79 |
async def startup_event():
|
|
|
|
| 80 |
ensure_model_dir_or_fail()
|
|
|
|
| 81 |
coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
|
| 82 |
chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
|
| 83 |
|
|
|
|
| 84 |
tasks = [
|
| 85 |
asyncio.create_task(coder_model.load_model_async()),
|
| 86 |
asyncio.create_task(chat_model.load_model_async()),
|
| 87 |
asyncio.create_task(load_neural_async()),
|
| 88 |
]
|
|
|
|
| 89 |
asyncio.gather(*tasks, return_exceptions=True)
|
| 90 |
+
logger.info("π Server Startup Complete")
|
| 91 |
|
| 92 |
class Message(BaseModel):
|
| 93 |
role: str
|
|
|
|
| 108 |
return str(datetime.now())
|
| 109 |
|
| 110 |
def search_sync(query: str):
|
| 111 |
+
"""Robust Sync Search with Retry Logic"""
|
| 112 |
+
logger.info(f"π Executing Search for: {query}")
|
| 113 |
try:
|
| 114 |
with DDGS() as ddgs:
|
| 115 |
+
# max_results increased to 4 for better context
|
| 116 |
+
results = list(ddgs.text(query, max_results=4))
|
| 117 |
if not results:
|
| 118 |
+
return None
|
| 119 |
+
|
| 120 |
+
formatted_res = ""
|
| 121 |
+
for r in results:
|
| 122 |
+
formatted_res += f"Source: {r['title']}\nSnippet: {r['body']}\nLink: {r['href']}\n\n"
|
| 123 |
+
return formatted_res
|
| 124 |
except Exception as e:
|
| 125 |
+
logger.error(f"DDGS Error: {e}")
|
| 126 |
+
return None
|
| 127 |
|
| 128 |
async def perform_web_search(query: str):
|
| 129 |
+
if not NEURAL_AVAILABLE: return None
|
|
|
|
|
|
|
| 130 |
return await asyncio.to_thread(search_sync, query)
|
| 131 |
|
| 132 |
# === INTENT LOGIC ===
|
| 133 |
|
| 134 |
def get_intent_neural(text: str):
|
| 135 |
+
if not neural_classifier: return "chat"
|
|
|
|
| 136 |
try:
|
|
|
|
| 137 |
if len(text.split()) < 2: return "chat"
|
|
|
|
| 138 |
user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
|
| 139 |
scores = {}
|
| 140 |
for intent, anchor_embeddings in encoded_anchors.items():
|
| 141 |
cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
|
| 142 |
scores[intent] = float(cosine_scores.max())
|
|
|
|
| 143 |
best = max(scores, key=scores.get)
|
| 144 |
+
if scores[best] < 0.28: return "chat"
|
|
|
|
| 145 |
return best
|
| 146 |
except Exception:
|
| 147 |
+
return "chat"
|
| 148 |
|
| 149 |
def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
|
| 150 |
if isinstance(chunk, dict):
|
|
|
|
| 153 |
|
| 154 |
SYSTEM_PREFIX = (
|
| 155 |
"You are Nexari-G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
|
| 156 |
+
"Your goal is to provide accurate, helpful, and concise answers. "
|
| 157 |
+
"Always identify yourself as Nexari-G1. "
|
| 158 |
+
"Use emojis to make the conversation lively. "
|
|
|
|
|
|
|
| 159 |
)
|
| 160 |
|
| 161 |
def limit_context(messages: List[Dict]) -> List[Dict]:
|
|
|
|
| 174 |
raw_msgs = [m.dict() for m in request.messages] if request.messages else []
|
| 175 |
if not raw_msgs: return {"error": "Empty messages"}
|
| 176 |
|
| 177 |
+
# 1. Capture User's Last Message
|
| 178 |
+
last_user_msg_obj = raw_msgs[-1]
|
| 179 |
+
last_msg_text = last_user_msg_obj['content']
|
| 180 |
|
| 181 |
+
intent = get_intent_neural(last_msg_text) or "chat"
|
| 182 |
+
|
| 183 |
+
# Fallback keyword check
|
| 184 |
+
if intent == "chat" and len(last_msg_text) > 3:
|
| 185 |
+
lower = last_msg_text.lower()
|
| 186 |
if "time" in lower and ("what" in lower or "tell" in lower): intent = "time"
|
| 187 |
elif "search" in lower or "google" in lower or "price" in lower or "news" in lower: intent = "search"
|
| 188 |
|
| 189 |
selected_model = chat_model.model
|
| 190 |
sys_msg = SYSTEM_PREFIX
|
| 191 |
+
status = "Thinking..."
|
| 192 |
+
|
| 193 |
+
# Context Injection Variables
|
| 194 |
+
injected_context = ""
|
| 195 |
+
|
| 196 |
+
# === ROUTING & TOOL EXECUTION ===
|
| 197 |
|
|
|
|
| 198 |
if intent == "coding" and getattr(coder_model, "model", None):
|
| 199 |
selected_model = coder_model.model
|
| 200 |
sys_msg += " You are an Expert Coder. Provide clean, working code."
|
|
|
|
| 202 |
|
| 203 |
elif intent == "reasoning" and getattr(chat_model, "model", None):
|
| 204 |
selected_model = chat_model.model
|
| 205 |
+
sys_msg += " Think step-by-step."
|
| 206 |
status = "Reasoning..."
|
| 207 |
|
|
|
|
|
|
|
|
|
|
| 208 |
elif intent == "time":
|
| 209 |
t = get_real_time()
|
| 210 |
+
# Inject directly into user prompt for high attention
|
| 211 |
+
injected_context = f"CURRENT DATE & TIME: {t}"
|
| 212 |
status = "Checking Time..."
|
| 213 |
|
| 214 |
elif intent == "search":
|
| 215 |
status = "Searching Web..."
|
| 216 |
+
clean_query = last_msg_text.replace("search", "").replace("google", "").strip()
|
| 217 |
+
search_q = clean_query if len(clean_query) > 2 else last_msg_text
|
|
|
|
| 218 |
|
| 219 |
res = await perform_web_search(search_q)
|
| 220 |
+
|
| 221 |
+
if res:
|
| 222 |
+
# STRONG INJECTION PATTERN
|
| 223 |
+
injected_context = (
|
| 224 |
+
f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
|
| 225 |
+
"### INSTRUCTION:\n"
|
| 226 |
+
"Answer the user's question using ONLY the Search Results above. "
|
| 227 |
+
"Do NOT use your internal training data if it conflicts. "
|
| 228 |
+
"If the price or data is in the results, state it clearly."
|
| 229 |
+
)
|
| 230 |
+
else:
|
| 231 |
+
injected_context = (
|
| 232 |
+
"### SYSTEM NOTE:\n"
|
| 233 |
+
"Attempted to search the web but found no results. "
|
| 234 |
+
"Please politely inform the user that you couldn't find current info."
|
| 235 |
+
)
|
| 236 |
|
| 237 |
+
# === MESSAGE CONSTRUCTION ===
|
| 238 |
+
|
| 239 |
+
# 1. Set System Prompt
|
| 240 |
if raw_msgs[0].get("role") != "system":
|
| 241 |
raw_msgs.insert(0, {"role":"system","content": sys_msg})
|
| 242 |
else:
|
| 243 |
raw_msgs[0]["content"] = sys_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
+
# 2. INJECT CONTEXT INTO LAST USER MESSAGE (Crucial Fix)
|
| 246 |
+
# This forces the model to see the context immediately before generating the answer.
|
| 247 |
+
if injected_context:
|
| 248 |
+
new_content = (
|
| 249 |
+
f"{injected_context}\n\n"
|
| 250 |
+
f"### USER QUESTION:\n{last_msg_text}"
|
| 251 |
+
)
|
| 252 |
+
# Update the last message in the list
|
| 253 |
+
raw_msgs[-1]['content'] = new_content
|
| 254 |
+
|
| 255 |
+
# 3. Model Warmup Check
|
| 256 |
+
if not selected_model:
|
| 257 |
+
if chat_model.model: selected_model = chat_model.model
|
| 258 |
+
elif coder_model.model: selected_model = coder_model.model
|
| 259 |
+
else: return {"error": "System warming up..."}
|
| 260 |
|
| 261 |
+
final_msgs = limit_context(raw_msgs)
|
| 262 |
|
| 263 |
+
# Streamer
|
| 264 |
def iter_response():
|
| 265 |
try:
|
| 266 |
yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
|