Nexari-Research commited on
Commit
9d13a39
·
verified ·
1 Parent(s): 0dce034

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -122
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py -- Final: Forced Thinking/Planning logic via System Prompt
2
  import os
3
  import json
4
  import logging
@@ -30,23 +30,8 @@ def ensure_model_dir_or_fail():
30
 
31
  @app.on_event("startup")
32
  async def startup_event():
33
- logger.info("Startup: ensure model dir and set base dirs...")
34
  ensure_model_dir_or_fail()
35
-
36
- router_model.BASE_DIR = os.path.join(MODEL_DIR, "router")
37
- coder_model.BASE_DIR = os.path.join(MODEL_DIR, "coder")
38
- chat_model.BASE_DIR = os.path.join(MODEL_DIR, "chat")
39
-
40
- # Load models asynchronously
41
- tasks = [
42
- asyncio.create_task(router_model.load_model_async()),
43
- asyncio.create_task(coder_model.load_model_async()),
44
- asyncio.create_task(chat_model.load_model_async()),
45
- ]
46
- results = await asyncio.gather(*tasks, return_exceptions=True)
47
- for i, r in enumerate(results):
48
- if isinstance(r, Exception):
49
- logger.error("Model loader %d failed: %s", i, r)
50
  logger.info("Startup complete.")
51
 
52
  class Message(BaseModel):
@@ -56,41 +41,16 @@ class Message(BaseModel):
56
  class ChatRequest(BaseModel):
57
  messages: list[Message]
58
  stream: bool = True
59
- temperature: float = 0.7
60
 
61
  def get_intent(last_user_message: str):
62
- # Simple keyword based routing fallback
63
- if not getattr(router_model, "model", None):
64
- text = (last_user_message or "").lower()
65
- if any(tok in text for tok in ["code", "bug", "fix", "error", "function", "python", "js", "html", "css"]):
66
- return "coding", "neutral"
67
- if any(tok in text for tok in ["why", "how", "prove", "reason", "think", "analyze"]):
68
- return "reasoning", "neutral"
69
- return "chat", "neutral"
70
-
71
- # Router model logic (if available)
72
- sys_prompt = "Analyze intent. Return JSON like {'intent':'coding'|'chat'|'reasoning', 'sentiment':'neutral'|'sad'}"
73
- try:
74
- res = router_model.model.create_chat_completion(
75
- messages=[{"role":"system","content":sys_prompt},{"role":"user","content": last_user_message}],
76
- temperature=0.1, max_tokens=50
77
- )
78
- content = ""
79
- try:
80
- content = res['choices'][0]['message']['content'].lower()
81
- except Exception:
82
- try:
83
- content = res['choices'][0]['text'].lower()
84
- except Exception:
85
- content = ""
86
-
87
- if "coding" in content: return "coding", "neutral"
88
- if "reasoning" in content: return "reasoning", "neutral"
89
- if "sad" in content: return "chat", "sad"
90
- return "chat", "neutral"
91
- except Exception as e:
92
- logger.exception("Router failure: %s", e)
93
- return "chat", "neutral"
94
 
95
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
96
  if isinstance(chunk, dict):
@@ -101,107 +61,74 @@ def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
101
  out[k] = v
102
  else:
103
  try:
104
- json.dumps(v)
105
- out[k] = v
106
- except Exception:
107
  out[k] = str(v)
108
  return out
109
- else:
110
- try:
111
- txt = str(chunk)
112
- return {"text": txt}
113
- except Exception:
114
- return {"text": ""}
115
-
116
- # === CORE IDENTITY & THOUGHT INSTRUCTIONS ===
117
- SYSTEM_IDENTITY_PREFIX = (
118
- "You are Nexari-G1, created by Piyush. "
119
- "Your name is Nexari-G1. "
120
- )
121
-
122
- # === MAGIC SAUCE: This forces the model to generate the <think> tags ===
123
- THOUGHT_INSTRUCTION = (
124
- "\n\nIMPORTANT INSTRUCTION: "
125
- "Before generating your final answer, you must perform a 'Thought Process'. "
126
- "You must enclose your planning, reasoning, and analysis inside <think> and </think> tags. "
127
- "Step 1: Analyze the user's request inside <think>...</think>. "
128
- "Step 2: Provide the final response after the </think> tag. "
129
- "Example format: <think>User wants X. I should verify Y first.</think> Here is the answer for X..."
130
  )
131
 
132
  @app.post("/v1/chat/completions")
133
  async def chat_endpoint(request: ChatRequest):
134
  messages = [m.dict() for m in request.messages] if request.messages else []
135
- if not messages:
136
- return {"error": "No messages provided."}
137
-
138
- last = messages[-1]['content']
139
- intent, sentiment = get_intent(last)
140
 
141
- selected_model = None
 
142
 
143
- # Base system message setup with Thought Instruction
144
- sys_msg = SYSTEM_IDENTITY_PREFIX + THOUGHT_INSTRUCTION
 
145
 
146
- if intent == "coding":
147
- if not getattr(coder_model, "model", None):
148
- return {"error":"Coder model not available."}
149
  selected_model = coder_model.model
150
- sys_msg += "\nYou are an expert Coding Assistant. Plan your code logic in the thought block."
151
- status_indicator = "Planning Code..."
152
-
153
- elif intent == "reasoning":
154
- if not getattr(chat_model, "model", None):
155
- return {"error":"Model not available."}
156
- selected_model = chat_model.model
157
- sys_msg += "\nYou are a reasoning engine. Break down complex logic in the thought block first."
158
- status_indicator = "Deep Thinking..."
159
-
160
- else: # Chat
161
- if not getattr(chat_model, "model", None):
162
- return {"error":"Chat model not available."}
163
  selected_model = chat_model.model
164
- if sentiment == "sad":
165
- sys_msg += "\nBe empathic and supportive."
166
- status_indicator = "Empathizing..."
167
- else:
168
- sys_msg += "\nYou are a helpful assistant."
169
- status_indicator = "Thinking..."
170
 
171
  # Inject System Prompt
 
 
172
  if messages[0].get("role") != "system":
173
- messages.insert(0, {"role":"system","content": sys_msg})
174
  else:
175
- # Append instruction to existing system prompt to ensure it overrides
176
- messages[0]["content"] = sys_msg
177
 
178
- def iter_response():
179
  try:
180
- # 1. Send Status Event (Triggers Dynamic Indicator text update)
181
- status_payload = json.dumps({"status": status_indicator})
182
- yield f"event: status\ndata: {status_payload}\n\n"
183
 
184
- # Small delay hint
185
- yield ":\n\n"
186
-
187
- # 2. Stream Model Response
188
  stream = selected_model.create_chat_completion(
189
  messages=messages,
190
  temperature=request.temperature,
191
  stream=True
192
  )
193
-
194
  for chunk in stream:
195
- safe = sanitize_chunk(chunk)
196
- # Ensure we send properly formatted SSE data
197
- yield f"data: {json.dumps(safe)}\n\n"
 
198
 
199
- # 3. Done Marker
200
  yield "data: [DONE]\n\n"
201
 
202
  except Exception as e:
203
- logger.exception("Streaming error: %s", e)
204
  yield f"data: {json.dumps({'error': str(e)})}\n\n"
205
- yield "data: [DONE]\n\n"
206
 
207
  return StreamingResponse(iter_response(), media_type="text/event-stream")
 
1
+ # app.py -- FINAL: Forced Thought Structure & Robust Streaming
2
  import os
3
  import json
4
  import logging
 
30
 
31
  @app.on_event("startup")
32
  async def startup_event():
 
33
  ensure_model_dir_or_fail()
34
+ # Initialize paths logic here...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  logger.info("Startup complete.")
36
 
37
  class Message(BaseModel):
 
41
  class ChatRequest(BaseModel):
42
  messages: list[Message]
43
  stream: bool = True
44
+ temperature: float = 0.6 # Slightly lower temp for better following instructions
45
 
46
  def get_intent(last_user_message: str):
47
+ # Basic intent detection
48
+ text = (last_user_message or "").lower()
49
+ if any(tok in text for tok in ["code", "python", "script", "function", "html", "css", "fix"]):
50
+ return "coding"
51
+ if any(tok in text for tok in ["why", "how", "reason", "analyze", "think"]):
52
+ return "reasoning"
53
+ return "chat"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def sanitize_chunk(chunk: Any) -> Dict[str, Any]:
56
  if isinstance(chunk, dict):
 
61
  out[k] = v
62
  else:
63
  try:
64
+ out[k] = json.dumps(v)
65
+ except:
 
66
  out[k] = str(v)
67
  return out
68
+ return {"text": str(chunk)}
69
+
70
+ # === SYSTEM PROMPTS WITH STRICT PLANNING ===
71
+ SYSTEM_IDENTITY = "You are Nexari-G1, created by Piyush."
72
+
73
+ # Is instruction se Model hamesha pehle sochega
74
+ STRICT_THOUGHT_PROMPT = (
75
+ "\n[IMPORTANT: REASONING REQUIRED]\n"
76
+ "1. You MUST start your response with a thinking block using <think>...</think> tags.\n"
77
+ "2. Inside <think>, plan your answer, analyze the user's request, and check for errors.\n"
78
+ "3. After </think>, provide the final polite response to the user.\n"
79
+ "4. Do NOT output the thought block if the user asks for a simple greeting, but still use empty tags <think></think> to maintain format."
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
  @app.post("/v1/chat/completions")
83
  async def chat_endpoint(request: ChatRequest):
84
  messages = [m.dict() for m in request.messages] if request.messages else []
85
+ if not messages: return {"error": "No messages"}
 
 
 
 
86
 
87
+ last_msg = messages[-1]['content']
88
+ intent = get_intent(last_msg)
89
 
90
+ # Select Model logic (Simplified for robustness)
91
+ selected_model = chat_model.model # Default
92
+ status_msg = "Thinking..."
93
 
94
+ if intent == "coding" and getattr(coder_model, "model", None):
 
 
95
  selected_model = coder_model.model
96
+ status_msg = "Planning Code..."
97
+ elif intent == "reasoning" and getattr(chat_model, "model", None):
 
 
 
 
 
 
 
 
 
 
 
98
  selected_model = chat_model.model
99
+ status_msg = "Deep Researching..."
 
 
 
 
 
100
 
101
  # Inject System Prompt
102
+ full_system_prompt = SYSTEM_IDENTITY + STRICT_THOUGHT_PROMPT
103
+
104
  if messages[0].get("role") != "system":
105
+ messages.insert(0, {"role": "system", "content": full_system_prompt})
106
  else:
107
+ messages[0]["content"] += STRICT_THOUGHT_PROMPT
 
108
 
109
+ async def iter_response():
110
  try:
111
+ # 1. Send Status
112
+ yield f"event: status\ndata: {json.dumps({'status': status_msg})}\n\n"
 
113
 
114
+ # 2. Generate Stream
115
+ # Note: Ensure your model.create_chat_completion supports stream=True properly
 
 
116
  stream = selected_model.create_chat_completion(
117
  messages=messages,
118
  temperature=request.temperature,
119
  stream=True
120
  )
121
+
122
  for chunk in stream:
123
+ safe_chunk = sanitize_chunk(chunk)
124
+ yield f"data: {json.dumps(safe_chunk)}\n\n"
125
+ # Small sleep to allow EventLoop to breathe if needed
126
+ await asyncio.sleep(0)
127
 
 
128
  yield "data: [DONE]\n\n"
129
 
130
  except Exception as e:
131
+ logger.error(f"Stream Error: {e}")
132
  yield f"data: {json.dumps({'error': str(e)})}\n\n"
 
133
 
134
  return StreamingResponse(iter_response(), media_type="text/event-stream")