Nexari-Research commited on
Commit
a3fb9fb
·
verified ·
1 Parent(s): 9d13a39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -49
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py -- FINAL: Forced Thought Structure & Robust Streaming
2
  import os
3
  import json
4
  import logging
@@ -30,8 +30,23 @@ def ensure_model_dir_or_fail():
30
 
31
  @app.on_event("startup")
32
  async def startup_event():
 
33
  ensure_model_dir_or_fail()
34
- # Initialize paths logic here...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  logger.info("Startup complete.")
36
 
37
  class Message(BaseModel):
@@ -41,16 +56,41 @@ class Message(BaseModel):
41
  class ChatRequest(BaseModel):
42
  messages: list[Message]
43
  stream: bool = True
44
- temperature: float = 0.6 # Slightly lower temp for better following instructions
45
 
46
  def get_intent(last_user_message: str):
47
- # Basic intent detection
48
- text = (last_user_message or "").lower()
49
- if any(tok in text for tok in ["code", "python", "script", "function", "html", "css", "fix"]):
50
- return "coding"
51
- if any(tok in text for tok in ["why", "how", "reason", "analyze", "think"]):
52
- return "reasoning"
53
- return "chat"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
56
  if isinstance(chunk, dict):
@@ -61,74 +101,107 @@ def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
61
  out[k] = v
62
  else:
63
  try:
64
- out[k] = json.dumps(v)
65
- except:
 
66
  out[k] = str(v)
67
  return out
68
- return {"text": str(chunk)}
69
-
70
- # === SYSTEM PROMPTS WITH STRICT PLANNING ===
71
- SYSTEM_IDENTITY = "You are Nexari-G1, created by Piyush."
72
-
73
- # Is instruction se Model hamesha pehle sochega
74
- STRICT_THOUGHT_PROMPT = (
75
- "\n[IMPORTANT: REASONING REQUIRED]\n"
76
- "1. You MUST start your response with a thinking block using <think>...</think> tags.\n"
77
- "2. Inside <think>, plan your answer, analyze the user's request, and check for errors.\n"
78
- "3. After </think>, provide the final polite response to the user.\n"
79
- "4. Do NOT output the thought block if the user asks for a simple greeting, but still use empty tags <think></think> to maintain format."
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
  @app.post("/v1/chat/completions")
83
  async def chat_endpoint(request: ChatRequest):
84
  messages = [m.dict() for m in request.messages] if request.messages else []
85
- if not messages: return {"error": "No messages"}
 
 
 
 
86
 
87
- last_msg = messages[-1]['content']
88
- intent = get_intent(last_msg)
89
 
90
- # Select Model logic (Simplified for robustness)
91
- selected_model = chat_model.model # Default
92
- status_msg = "Thinking..."
93
 
94
- if intent == "coding" and getattr(coder_model, "model", None):
 
 
95
  selected_model = coder_model.model
96
- status_msg = "Planning Code..."
97
- elif intent == "reasoning" and getattr(chat_model, "model", None):
 
 
 
 
 
 
 
 
 
 
 
98
  selected_model = chat_model.model
99
- status_msg = "Deep Researching..."
 
 
 
 
 
100
 
101
  # Inject System Prompt
102
- full_system_prompt = SYSTEM_IDENTITY + STRICT_THOUGHT_PROMPT
103
-
104
  if messages[0].get("role") != "system":
105
- messages.insert(0, {"role": "system", "content": full_system_prompt})
106
  else:
107
- messages[0]["content"] += STRICT_THOUGHT_PROMPT
 
108
 
109
- async def iter_response():
110
  try:
111
- # 1. Send Status
112
- yield f"event: status\ndata: {json.dumps({'status': status_msg})}\n\n"
 
113
 
114
- # 2. Generate Stream
115
- # Note: Ensure your model.create_chat_completion supports stream=True properly
 
 
116
  stream = selected_model.create_chat_completion(
117
  messages=messages,
118
  temperature=request.temperature,
119
  stream=True
120
  )
121
-
122
  for chunk in stream:
123
- safe_chunk = sanitize_chunk(chunk)
124
- yield f"data: {json.dumps(safe_chunk)}\n\n"
125
- # Small sleep to allow EventLoop to breathe if needed
126
- await asyncio.sleep(0)
127
 
 
128
  yield "data: [DONE]\n\n"
129
 
130
  except Exception as e:
131
- logger.error(f"Stream Error: {e}")
132
  yield f"data: {json.dumps({'error': str(e)})}\n\n"
 
133
 
134
  return StreamingResponse(iter_response(), media_type="text/event-stream")
 
1
+ # app.py -- Final: Forced Thinking/Planning logic via System Prompt
2
  import os
3
  import json
4
  import logging
 
30
 
31
  @app.on_event("startup")
32
  async def startup_event():
33
+ logger.info("Startup: ensure model dir and set base dirs...")
34
  ensure_model_dir_or_fail()
35
+
36
+ router_model.BASE_DIR = os.path.join(MODEL_DIR, "router")
37
+ coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
38
+ chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
39
+
40
+ # Load models asynchronously
41
+ tasks = [
42
+ asyncio.create_task(router_model.load_model_async()),
43
+ asyncio.create_task(coder_model.load_model_async()),
44
+ asyncio.create_task(chat_model.load_model_async()),
45
+ ]
46
+ results = await asyncio.gather(*tasks, return_exceptions=True)
47
+ for i, r in enumerate(results):
48
+ if isinstance(r, Exception):
49
+ logger.error("Model loader %d failed: %s", i, r)
50
  logger.info("Startup complete.")
51
 
52
  class Message(BaseModel):
 
56
  class ChatRequest(BaseModel):
57
  messages: list[Message]
58
  stream: bool = True
59
+ temperature: float = 0.7
60
 
61
  def get_intent(last_user_message: str):
62
+ # Simple keyword based routing fallback
63
+ if not getattr(router_model, "model", None):
64
+ text = (last_user_message or "").lower()
65
+ if any(tok in text for tok in ["code", "bug", "fix", "error", "function", "python", "js", "html", "css"]):
66
+ return "coding", "neutral"
67
+ if any(tok in text for tok in ["why", "how", "prove", "reason", "think", "analyze"]):
68
+ return "reasoning", "neutral"
69
+ return "chat", "neutral"
70
+
71
+ # Router model logic (if available)
72
+ sys_prompt = "Analyze intent. Return JSON like {'intent':'coding'|'chat'|'reasoning', 'sentiment':'neutral'|'sad'}"
73
+ try:
74
+ res = router_model.model.create_chat_completion(
75
+ messages=[{"role":"system","content":sys_prompt},{"role":"user","content": last_user_message}],
76
+ temperature=0.1, max_tokens=50
77
+ )
78
+ content = ""
79
+ try:
80
+ content = res['choices'][0]['message']['content'].lower()
81
+ except Exception:
82
+ try:
83
+ content = res['choices'][0]['text'].lower()
84
+ except Exception:
85
+ content = ""
86
+
87
+ if "coding" in content: return "coding", "neutral"
88
+ if "reasoning" in content: return "reasoning", "neutral"
89
+ if "sad" in content: return "chat", "sad"
90
+ return "chat", "neutral"
91
+ except Exception as e:
92
+ logger.exception("Router failure: %s", e)
93
+ return "chat", "neutral"
94
 
95
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
96
  if isinstance(chunk, dict):
 
101
  out[k] = v
102
  else:
103
  try:
104
+ json.dumps(v)
105
+ out[k] = v
106
+ except Exception:
107
  out[k] = str(v)
108
  return out
109
+ else:
110
+ try:
111
+ txt = str(chunk)
112
+ return {"text": txt}
113
+ except Exception:
114
+ return {"text": ""}
115
+
116
+ # === CORE IDENTITY & THOUGHT INSTRUCTIONS ===
117
+ SYSTEM_IDENTITY_PREFIX = (
118
+ "You are Nexari-G1, created by Piyush. "
119
+ "Your name is Nexari-G1. "
120
+ )
121
+
122
+ # === MAGIC SAUCE: This forces the model to generate the <think> tags ===
123
+ THOUGHT_INSTRUCTION = (
124
+ "\n\nIMPORTANT INSTRUCTION: "
125
+ "Before generating your final answer, you must perform a 'Thought Process'. "
126
+ "You must enclose your planning, reasoning, and analysis inside <think> and </think> tags. "
127
+ "Step 1: Analyze the user's request inside <think>...</think>. "
128
+ "Step 2: Provide the final response after the </think> tag. "
129
+ "Example format: <think>User wants X. I should verify Y first.</think> Here is the answer for X..."
130
  )
131
 
132
  @app.post("/v1/chat/completions")
133
  async def chat_endpoint(request: ChatRequest):
134
  messages = [m.dict() for m in request.messages] if request.messages else []
135
+ if not messages:
136
+ return {"error": "No messages provided."}
137
+
138
+ last = messages[-1]['content']
139
+ intent, sentiment = get_intent(last)
140
 
141
+ selected_model = None
 
142
 
143
+ # Base system message setup with Thought Instruction
144
+ sys_msg = SYSTEM_IDENTITY_PREFIX + THOUGHT_INSTRUCTION
 
145
 
146
+ if intent == "coding":
147
+ if not getattr(coder_model, "model", None):
148
+ return {"error":"Coder model not available."}
149
  selected_model = coder_model.model
150
+ sys_msg += "\nYou are an expert Coding Assistant. Plan your code logic in the thought block."
151
+ status_indicator = "Planning Code..."
152
+
153
+ elif intent == "reasoning":
154
+ if not getattr(chat_model, "model", None):
155
+ return {"error":"Model not available."}
156
+ selected_model = chat_model.model
157
+ sys_msg += "\nYou are a reasoning engine. Break down complex logic in the thought block first."
158
+ status_indicator = "Deep Thinking..."
159
+
160
+ else: # Chat
161
+ if not getattr(chat_model, "model", None):
162
+ return {"error":"Chat model not available."}
163
  selected_model = chat_model.model
164
+ if sentiment == "sad":
165
+ sys_msg += "\nBe empathic and supportive."
166
+ status_indicator = "Empathizing..."
167
+ else:
168
+ sys_msg += "\nYou are a helpful assistant."
169
+ status_indicator = "Thinking..."
170
 
171
  # Inject System Prompt
 
 
172
  if messages[0].get("role") != "system":
173
+ messages.insert(0, {"role":"system","content": sys_msg})
174
  else:
175
+ # Append instruction to existing system prompt to ensure it overrides
176
+ messages[0]["content"] = sys_msg
177
 
178
+ def iter_response():
179
  try:
180
+ # 1. Send Status Event (Triggers Dynamic Indicator text update)
181
+ status_payload = json.dumps({"status": status_indicator})
182
+ yield f"event: status\ndata: {status_payload}\n\n"
183
 
184
+ # Small delay hint
185
+ yield ":\n\n"
186
+
187
+ # 2. Stream Model Response
188
  stream = selected_model.create_chat_completion(
189
  messages=messages,
190
  temperature=request.temperature,
191
  stream=True
192
  )
193
+
194
  for chunk in stream:
195
+ safe = sanitize_chunk(chunk)
196
+ # Ensure we send properly formatted SSE data
197
+ yield f"data: {json.dumps(safe)}\n\n"
 
198
 
199
+ # 3. Done Marker
200
  yield "data: [DONE]\n\n"
201
 
202
  except Exception as e:
203
+ logger.exception("Streaming error: %s", e)
204
  yield f"data: {json.dumps({'error': str(e)})}\n\n"
205
+ yield "data: [DONE]\n\n"
206
 
207
  return StreamingResponse(iter_response(), media_type="text/event-stream")