File size: 10,766 Bytes
941017d
cf19141
 
 
 
a96cb5b
941017d
 
 
ded04d8
941017d
ded04d8
941017d
ded04d8
 
 
941017d
546169c
 
9ec0e83
3e7580e
941017d
3e7580e
941017d
546169c
ded04d8
1c76a46
ded04d8
 
d16dd06
ded04d8
f0e7a67
ded04d8
941017d
af8ad70
941017d
 
 
af8ad70
941017d
af8ad70
941017d
 
 
 
 
 
af8ad70
 
941017d
 
 
 
 
 
cf19141
941017d
cf19141
941017d
 
 
3e7580e
941017d
 
 
 
 
 
 
 
 
 
 
 
 
8bdad9a
941017d
 
ded04d8
 
941017d
 
8bdad9a
 
941017d
 
 
 
 
 
 
 
07654fd
ded04d8
 
 
 
 
941017d
8bdad9a
a3fb9fb
4920de9
941017d
b1432b7
941017d
 
 
 
 
 
8bdad9a
941017d
 
8bdad9a
941017d
 
 
 
 
 
 
8bdad9a
941017d
8bdad9a
 
941017d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf19141
941017d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf19141
941017d
 
b1432b7
941017d
 
 
 
8bdad9a
 
 
941017d
 
 
 
 
8bdad9a
941017d
 
 
 
 
0c5b849
941017d
 
0c5b849
941017d
 
 
 
 
 
 
 
 
 
0c5b849
ded04d8
cf19141
 
941017d
 
 
 
 
 
 
 
b1432b7
cf19141
 
ec58582
cf19141
941017d
 
 
cf19141
 
 
546169c
941017d
 
 
 
8bdad9a
941017d
a96cb5b
941017d
 
cf19141
941017d
8bdad9a
cf19141
941017d
 
 
 
 
8bdad9a
941017d
 
 
 
 
cf19141
941017d
cf19141
 
 
941017d
cf19141
 
 
 
 
 
941017d
 
 
 
 
cf19141
941017d
 
 
 
cf19141
941017d
 
 
cf19141
 
 
941017d
 
 
cf19141
941017d
cf19141
 
941017d
cf19141
941017d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# app.py โ€” Nexari G1 (Advanced Intent Analysis & Confidence Gating)
import os
import json
import logging
import asyncio
from datetime import datetime
import pytz 
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Any, Dict, List

# Local model modules
import coder_model
import chat_model

# === SAFE IMPORT FOR NEW LIBRARIES ===
try:
    from sentence_transformers import SentenceTransformer, util
    from duckduckgo_search import DDGS
    NEURAL_AVAILABLE = True
except ImportError:
    NEURAL_AVAILABLE = False
    print("โš ๏ธ WARNING: sentence-transformers or duckduckgo-search not found.")

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("nexari.app")

app = FastAPI()

MODEL_DIR = "./models"
NEURAL_DIR = os.path.join(MODEL_DIR, "neural")

# === CONFIGURATION ===
NEURAL_MODEL_NAME = "all-MiniLM-L6-v2"
neural_classifier = None
encoded_anchors = {}
MAX_HISTORY_MESSAGES = 6 

# Optimized Anchors for better Vector Separation
INTENT_ANCHORS = {
    "coding": ["write python code", "fix bug", "create function", "script", "debug", "sql query", "html css", "java code"],
    "reasoning": ["solve math", "explain logic", "why", "prove that", "analyze", "physics", "chemistry"],
    "search": ["latest news", "price of gold", "weather today", "who is the ceo", "current stock price", "search google", "find info"],
    "time": ["what time is it", "current time", "date today", "clock", "day is today"],
    # New category to pull "Identity" questions away from Search
    "identity": ["what is your name", "who are you", "who created you", "tell me about yourself", "are you ai"]
}

def ensure_model_dir_or_fail():
    try:
        os.makedirs(MODEL_DIR, exist_ok=True)
        os.makedirs(NEURAL_DIR, exist_ok=True)
    except Exception as e:
        logger.critical("Unable to create model dir: %s", e)

# === LOADERS ===

def load_neural_network():
    global neural_classifier, encoded_anchors
    if not NEURAL_AVAILABLE: return

    try:
        logger.info("โณ Loading Neural Intent Model...")
        model = SentenceTransformer(NEURAL_MODEL_NAME, cache_folder=NEURAL_DIR, device="cpu")
        
        anchors = {}
        for intent, texts in INTENT_ANCHORS.items():
            anchors[intent] = model.encode(texts, convert_to_tensor=True)
        
        neural_classifier = model
        encoded_anchors = anchors
        logger.info("โœ… Neural Intent Classifier Ready!")
    except Exception as e:
        logger.error(f"โŒ Failed to load Neural Network: {e}")

async def load_neural_async():
    await asyncio.to_thread(load_neural_network)

@app.on_event("startup")
async def startup_event():
    ensure_model_dir_or_fail()
    coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
    chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")

    tasks = [
        asyncio.create_task(coder_model.load_model_async()),
        asyncio.create_task(chat_model.load_model_async()),
        asyncio.create_task(load_neural_async()),
    ]
    asyncio.gather(*tasks, return_exceptions=True)
    logger.info("๐Ÿš€ Server Startup Complete")

class Message(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    messages: list[Message]
    stream: bool = True
    temperature: float = 0.7

# === TOOLS ===

def get_real_time():
    try:
        ist = pytz.timezone('Asia/Kolkata')
        return datetime.now(ist).strftime("%A, %d %B %Y, %I:%M %p (IST)")
    except Exception:
        return str(datetime.now())

def search_sync(query: str):
    logger.info(f"๐Ÿ”Ž Executing Search for: {query}")
    try:
        with DDGS() as ddgs:
            results = list(ddgs.text(query, max_results=4))
            if not results: return None
            formatted_res = ""
            for r in results:
                formatted_res += f"Source: {r['title']}\nSnippet: {r['body']}\nLink: {r['href']}\n\n"
            return formatted_res
    except Exception as e:
        logger.error(f"DDGS Error: {e}")
        return None

async def perform_web_search(query: str):
    if not NEURAL_AVAILABLE: return None
    return await asyncio.to_thread(search_sync, query)

# === ADVANCED INTENT LOGIC (2025 Technique) ===

def analyze_deep_intent(text: str):
    """
    Combines Neural Similarity with Confidence Gating & Token Chain Analysis.
    Returns: (intent_name, confidence_score)
    """
    # 1. Low-Level Token Analysis (The Chain Reaction Check)
    text_lower = text.lower()
    tokens = text_lower.split()
    
    # GUARDRAIL: Self-Reference Override
    # If user asks about "your name", "you", "yourself" -> Force Chat/Identity
    self_tokens = {"your", "you", "yourself", "created", "made"}
    if "name" in tokens and len(tokens) < 7 and any(t in tokens for t in self_tokens):
        return "identity", 0.99
    
    if not neural_classifier: return "chat", 0.0

    try:
        # 2. Neural Vector Search
        user_embedding = neural_classifier.encode(text, convert_to_tensor=True)
        scores = {}
        for intent, anchor_embeddings in encoded_anchors.items():
            cosine_scores = util.cos_sim(user_embedding, anchor_embeddings)
            # We take the MAX score from the anchor group
            scores[intent] = float(cosine_scores.max())
        
        best_intent = max(scores, key=scores.get)
        best_score = scores[best_intent]
        
        # 3. Confidence Gating
        # Search needs HIGH confidence to trigger (prevent false positives)
        if best_intent == "search":
            if best_score < 0.45: # Strict threshold for search
                logger.info(f"โš ๏ธ Search detected but low confidence ({best_score:.2f}). Fallback to chat.")
                return "chat", best_score
        
        # Identity maps to chat logic internally
        if best_intent == "identity":
            return "chat", best_score
            
        # General Threshold
        if best_score < 0.30: 
            return "chat", best_score
            
        return best_intent, best_score

    except Exception as e:
        logger.error(f"Intent Error: {e}")
        return "chat", 0.0

def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
    if isinstance(chunk, dict):
        return {k: v for k, v in chunk.items() if k != "status"}
    return {"text": str(chunk)}

SYSTEM_PREFIX = (
    "You are Nexari G1, an advanced AI created by Piyush, the CEO of Nexari AI. "
    "Your core directive is to be helpful, and accurate. "
    "Always respond naturally, clarity, and Always respond with a friendly, positive tone, regardless of context. "
    "You have internet access, web search ability, and server tools. Never deny these capabilities. "
    
    # --- NEW FOCUS INSTRUCTION START ---
    "ATTENTION PROTOCOL: "
    "1. Always prioritize the userโ€™s latest message above everything else. "
    "2. Use previous messages only for context (names, continuity, or details). "
    "3. If the user changes the topic, immediately switch and ignore the old topic. "
    "4. Do not repeat previous answers unless the user clearly asks for repetition. "
    # --- NEW FOCUS INSTRUCTION END ---

    "Use emojis to make the conversation lively. "
)

def limit_context(messages: List[Dict]) -> List[Dict]:
    if not messages: return []
    sys = messages[0] if messages[0].get("role") == "system" else None
    start = 1 if sys else 0
    rem = messages[start:]
    if len(rem) > MAX_HISTORY_MESSAGES: rem = rem[-MAX_HISTORY_MESSAGES:]
    final = []
    if sys: final.append(sys)
    final.extend(rem)
    return final

@app.post("/v1/chat/completions")
async def chat_endpoint(request: ChatRequest):
    raw_msgs = [m.dict() for m in request.messages] if request.messages else []
    if not raw_msgs: return {"error": "Empty messages"}
    
    last_msg_text = raw_msgs[-1]['content']
    
    # === ANALYZE INTENT ===
    intent, confidence = analyze_deep_intent(last_msg_text)
    
    logger.info(f"๐Ÿง  Analysis: Text='{last_msg_text}' | Intent='{intent}' | Conf={confidence:.2f}")

    selected_model = chat_model.model
    sys_msg = SYSTEM_PREFIX
    status = "Thinking..."
    injected_context = ""
    
    # === ROUTING ===
    
    if intent == "coding" and getattr(coder_model, "model", None):
        selected_model = coder_model.model
        sys_msg += " You are an Expert Coder. Provide clean, working code."
        status = "Coding..."
    
    elif intent == "reasoning" and getattr(chat_model, "model", None):
        selected_model = chat_model.model
        sys_msg += " Think step-by-step."
        status = "Reasoning..."
        
    elif intent == "time":
        t = get_real_time()
        injected_context = f"CURRENT DATE & TIME: {t}"
        status = "Checking Time..."
        
    elif intent == "search":
        status = "Searching Web..."
        clean_query = last_msg_text.replace("search", "").replace("google", "").strip()
        search_q = clean_query if len(clean_query) > 2 else last_msg_text
        
        res = await perform_web_search(search_q)
        
        if res:
            injected_context = (
                f"### SEARCH RESULTS (REAL-TIME DATA):\n{res}\n"
                "### INSTRUCTION:\n"
                "Answer the user's question using ONLY the Search Results above."
            )
        else:
            # Silent fallback if search fails
            injected_context = ""
            status = "Thinking..."

    # === CONSTRUCT MESSAGE ===
    if raw_msgs[0].get("role") != "system":
        raw_msgs.insert(0, {"role":"system","content": sys_msg})
    else:
        raw_msgs[0]["content"] = sys_msg

    if injected_context:
        new_content = (
            f"{injected_context}\n\n"
            f"### USER QUESTION:\n{last_msg_text}"
        )
        raw_msgs[-1]['content'] = new_content

    if not selected_model:
        if chat_model.model: selected_model = chat_model.model
        elif coder_model.model: selected_model = coder_model.model
        else: return {"error": "System warming up..."}

    final_msgs = limit_context(raw_msgs)

    def iter_response():
        try:
            yield f"event: status\ndata: {json.dumps({'status': status})}\n\n"
            yield ":\n\n"
            stream = selected_model.create_chat_completion(
                messages=final_msgs, temperature=request.temperature, stream=True
            )
            for chunk in stream:
                yield f"data: {json.dumps(sanitize_chunk(chunk))}\n\n"
            yield "data: [DONE]\n\n"
        except Exception as e:
            logger.error(f"Stream error: {e}")
            yield f"data: {json.dumps({'error': str(e)})}\n\n"

    return StreamingResponse(iter_response(), media_type="text/event-stream")