Spaces:
Running
Running
File size: 10,766 Bytes
941017d cf19141 a96cb5b 941017d ded04d8 941017d ded04d8 941017d ded04d8 941017d 546169c 9ec0e83 3e7580e 941017d 3e7580e 941017d 546169c ded04d8 1c76a46 ded04d8 d16dd06 ded04d8 f0e7a67 ded04d8 941017d af8ad70 941017d af8ad70 941017d af8ad70 941017d af8ad70 941017d cf19141 941017d cf19141 941017d 3e7580e 941017d 8bdad9a 941017d ded04d8 941017d 8bdad9a 941017d 07654fd ded04d8 941017d 8bdad9a a3fb9fb 4920de9 941017d b1432b7 941017d 8bdad9a 941017d 8bdad9a 941017d 8bdad9a 941017d 8bdad9a 941017d cf19141 941017d cf19141 941017d b1432b7 941017d 8bdad9a 941017d 8bdad9a 941017d 0c5b849 941017d 0c5b849 941017d 0c5b849 ded04d8 cf19141 941017d b1432b7 cf19141 ec58582 cf19141 941017d cf19141 546169c 941017d 8bdad9a 941017d a96cb5b 941017d cf19141 941017d 8bdad9a cf19141 941017d 8bdad9a 941017d cf19141 941017d cf19141 941017d cf19141 941017d cf19141 941017d cf19141 941017d cf19141 941017d cf19141 941017d cf19141 941017d cf19141 941017d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
# app.py โ Nexari G1 (Advanced Intent Analysis & Confidence Gating)
import os
import json
import logging
import asyncio
from datetime import datetime
import pytz
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Any, Dict, List
# Local model modules
import coder_model
import chat_model
# === SAFE IMPORT FOR NEW LIBRARIES ===
try:
from sentence_transformers import SentenceTransformer, util
from duckduckgo_search import DDGS
NEURAL_AVAILABLE = True
except ImportError:
NEURAL_AVAILABLE = False
print("โ ๏ธ WARNING: sentence-transformers or duckduckgo-search not found.")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("nexari.app")
app = FastAPI()
MODEL_DIR = "./models"
NEURAL_DIR = os.path.join(MODEL_DIR, "neural")
# === CONFIGURATION ===
NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
neural_classifier = None
encoded_anchors = {}
MAX_HISTORY_MESSAGES = 6
# Optimized Anchors for better Vector Separation
INTENT_ANCHORS = {
"coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css", "java code"],
"reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics", "chemistry"],
"search": ["latest news", "price of gold", "weather today", "who is the ceo", "current stock price", "search google", "find info"],
"time": ["what time is it", "current time", "date today", "clock", "day is today"],
# New category to pull "Identity" questions away from Search
"identity": ["what is your name", "who are you", "who created you", "tell me about yourself", "are you ai"]
}
def ensure_model_dir_or_fail():
try:
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(NEURAL_DIR, exist_ok=True)
except Exception as e:
logger.critical("Unable to create model dir: %s", e)
# === LOADERS ===
def load_neural_network():
global neural_classifier, encoded_anchors
if not NEURAL_AVAILABLE: return
try:
logger.info("โณ Loading Neural Intent Model...")
model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
anchors = {}
for intent, texts in INTENT_ANCHORS.items():
anchors[intent] = model.encode(texts, convert_to_tensor=True)
neural_classifier = model
encoded_anchors = anchors
logger.info("โ
Neural Intent Classifier Ready!")
except Exception as e:
logger.error(f"โ Failed to load Neural Network: {e}")
async def load_neural_async():
await asyncio.to_thread(load_neural_network)
@app.on_event("startup")
async def startup_event():
ensure_model_dir_or_fail()
coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
tasks = [
asyncio.create_task(coder_model.load_model_async()),
asyncio.create_task(chat_model.load_model_async()),
asyncio.create_task(load_neural_async()),
]
asyncio.gather(*tasks, return_exceptions=True)
logger.info("๐ Server Startup Complete")
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
messages: list[Message]
stream: bool = True
temperature: float = 0.7
# === TOOLS ===
def get_real_time():
try:
ist = pytz.timezone('Asia/Kolkata')
return datetime.now(ist).strftime("%A, %d %B %Y, %I:%M %p (IST)")
except Exception:
return str(datetime.now())
def search_sync(query: str):
logger.info(f"๐ Executing Search for: {query}")
try:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=4))
if not results: return None
formatted_res = ""
for r in results:
formatted_res += f"Source: {r['title']}\nSnippet: {r['body']}\nLink: {r['href']}\n\n"
return formatted_res
except Exception as e:
logger.error(f"DDGS Error: {e}")
return None
async def perform_web_search(query: str):
if not NEURAL_AVAILABLE: return None
return await asyncio.to_thread(search_sync, query)
# === ADVANCED INTENT LOGIC (2025 Technique) ===
def analyze_deep_intent(text: str):
"""
Combines Neural Similarity with Confidence Gating & Token Chain Analysis.
Returns: (intent_name, confidence_score)
"""
# 1. Low-Level Token Analysis (The Chain Reaction Check)
text_lower = text.lower()
tokens = text_lower.split()
# GUARDRAIL: Self-Reference Override
# If user asks about "your name", "you", "yourself" -> Force Chat/Identity
self_tokens = {"your", "you", "yourself", "created", "made"}
if "name" in tokens and len(tokens) < 7 and any(t in tokens for t in self_tokens):
return "identity", 0.99
if not neural_classifier: return "chat", 0.0
try:
# 2. Neural Vector Search
user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
scores = {}
for intent, anchor_embeddings in encoded_anchors.items():
cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
# We take the MAX score from the anchor group
scores[intent] = float(cosine_scores.max())
best_intent = max(scores, key=scores.get)
best_score = scores[best_intent]
# 3. Confidence Gating
# Search needs HIGH confidence to trigger (prevent false positives)
if best_intent == "search":
if best_score < 0.45: # Strict threshold for search
logger.info(f"โ ๏ธ Search detected but low confidence ({best_score:.2f}). Fallback to chat.")
return "chat", best_score
# Identity maps to chat logic internally
if best_intent == "identity":
return "chat", best_score
# General Threshold
if best_score < 0.30:
return "chat", best_score
return best_intent, best_score
except Exception as e:
logger.error(f"Intent Error: {e}")
return "chat", 0.0
def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
if isinstance(chunk, dict):
return {k: v for k, v in chunk.items() if k != "status"}
return {"text": str(chunk)}
SYSTEM_PREFIX = (
"You are Nexari G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
"Your core directive is to be helpful, and accurate. "
"Always respond naturally, clarity, and Always respond with a friendly, positive tone, regardless of context. "
"You have internet access, web search ability, and server tools. Never deny these capabilities. "
# --- NEW FOCUS INSTRUCTION START ---
"ATTENTION PROTOCOL: "
"1. Always prioritize the userโs latest message above everything else. "
"2. Use previous messages only for context (names, continuity, or details). "
"3. If the user changes the topic, immediately switch and ignore the old topic. "
"4. Do not repeat previous answers unless the user clearly asks for repetition. "
# --- NEW FOCUS INSTRUCTION END ---
"Use emojis to make the conversation lively. "
)
def limit_context(messages: List[Dict]) -> List[Dict]:
if not messages: return []
sys = messages[0] if messages[0].get("role") == "system" else None
start = 1 if sys else 0
rem = messages[start:]
if len(rem) > MAX_HISTORY_MESSAGES: rem = rem[-MAX_HISTORY_MESSAGES:]
final = []
if sys: final.append(sys)
final.extend(rem)
return final
@app.post("/v1/chat/completions")
async def chat_endpoint(request: ChatRequest):
raw_msgs = [m.dict() for m in request.messages] if request.messages else []
if not raw_msgs: return {"error": "Empty messages"}
last_msg_text = raw_msgs[-1]['content']
# === ANALYZE INTENT ===
intent, confidence = analyze_deep_intent(last_msg_text)
logger.info(f"๐ง Analysis: Text='{last_msg_text}' | Intent='{intent}' | Conf={confidence:.2f}")
selected_model = chat_model.model
sys_msg = SYSTEM_PREFIX
status = "Thinking..."
injected_context = ""
# === ROUTING ===
if intent == "coding" and getattr(coder_model, "model", None):
selected_model = coder_model.model
sys_msg += " You are an Expert Coder. Provide clean, working code."
status = "Coding..."
elif intent == "reasoning" and getattr(chat_model, "model", None):
selected_model = chat_model.model
sys_msg += " Think step-by-step."
status = "Reasoning..."
elif intent == "time":
t = get_real_time()
injected_context = f"CURRENT DATE & TIME: {t}"
status = "Checking Time..."
elif intent == "search":
status = "Searching Web..."
clean_query = last_msg_text.replace("search", "").replace("google", "").strip()
search_q = clean_query if len(clean_query) > 2 else last_msg_text
res = await perform_web_search(search_q)
if res:
injected_context = (
f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
"### INSTRUCTION:\n"
"Answer the user's question using ONLY the Search Results above."
)
else:
# Silent fallback if search fails
injected_context = ""
status = "Thinking..."
# === CONSTRUCT MESSAGE ===
if raw_msgs[0].get("role") != "system":
raw_msgs.insert(0, {"role":"system","content": sys_msg})
else:
raw_msgs[0]["content"] = sys_msg
if injected_context:
new_content = (
f"{injected_context}\n\n"
f"### USER QUESTION:\n{last_msg_text}"
)
raw_msgs[-1]['content'] = new_content
if not selected_model:
if chat_model.model: selected_model = chat_model.model
elif coder_model.model: selected_model = coder_model.model
else: return {"error": "System warming up..."}
final_msgs = limit_context(raw_msgs)
def iter_response():
try:
yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
yield ":\n\n"
stream = selected_model.create_chat_completion(
messages=final_msgs, temperature=request.temperature, stream=True
)
for chunk in stream:
yield f"data: {json.dumps(sanitize_chunk(chunk))}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
logger.error(f"Stream error: {e}")
yield f"data: {json.dumps({'error': str(e)})}\n\n"
return StreamingResponse(iter_response(), media_type="text/event-stream")
|