Nexari-Research commited on
Commit
54d8f9b
·
verified ·
1 Parent(s): 728bca0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -32
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py (updated)
2
  import os
3
  import json
4
  import logging
@@ -72,18 +71,8 @@ def get_intent(last_user_message: str):
72
  messages=[{"role": "system", "content": sys_prompt}, {"role": "user", "content": last_user_message}],
73
  temperature=0.1, max_tokens=50
74
  )
75
- # Some Llama bindings return the assistant message differently.
76
- # Try to be robust in extracting the content.
77
- content = ""
78
- try:
79
- content = res['choices'][0]['message']['content'].lower()
80
- except Exception:
81
- # fallback to other known key shapes
82
- try:
83
- content = res['choices'][0]['text'].lower()
84
- except Exception:
85
- content = ""
86
-
87
  if "coding" in content:
88
  return "coding", "neutral"
89
  # Reasoning intent detection
@@ -149,32 +138,23 @@ async def chat_endpoint(request: ChatRequest):
149
  # 3. Stream Response with Status Packet
150
  def iter_response():
151
  try:
152
- # === IMPROVEMENT: Send a properly-framed SSE 'status' event first ===
153
- # Many clients parse SSE best when status is an explicit event and separated from token stream.
154
- # This ensures the frontend's DynamicIndicator.updateText(...) sees the new status reliably.
155
- status_event = f"event: status\n"
156
- status_event += f"data: {json.dumps({'status': status_indicator})}\n\n"
157
- yield status_event
158
 
159
- # Small keepalive / separator to reduce buffering risk
160
- yield ":\n\n"
161
-
162
- # Now start streaming model output as before (each chunk as SSE 'data: ...')
163
  stream = selected_model.create_chat_completion(
164
  messages=messages,
165
  temperature=request.temperature,
166
  stream=True
167
  )
 
 
 
 
 
 
168
  for chunk in stream:
169
- # chunk is expected to be a dict-like SSE payload from llama-cpp
170
- # send as a data: <json>\n\n line so the client JSON.parse(...) works
171
- try:
172
- yield f"data: {json.dumps(chunk)}\n\n"
173
- except Exception:
174
- # In case chunk contains non-serializable objects, convert safe repr
175
- yield f"data: {json.dumps({'text': str(chunk)})}\n\n"
176
-
177
- # Final done marker
178
  yield "data: [DONE]\n\n"
179
  except Exception as e:
180
  logger.exception(f"Error while streaming: {e}")
 
 
1
  import os
2
  import json
3
  import logging
 
71
  messages=[{"role": "system", "content": sys_prompt}, {"role": "user", "content": last_user_message}],
72
  temperature=0.1, max_tokens=50
73
  )
74
+ content = res['choices'][0]['message']['content'].lower()
75
+
 
 
 
 
 
 
 
 
 
 
76
  if "coding" in content:
77
  return "coding", "neutral"
78
  # Reasoning intent detection
 
138
  # 3. Stream Response with Status Packet
139
  def iter_response():
140
  try:
141
+ # Send status packet first (frontend expects this to update its indicator)
142
+ yield f"data: {json.dumps({'status': status_indicator})}\n\n"
 
 
 
 
143
 
144
+ # Start model streaming generator
 
 
 
145
  stream = selected_model.create_chat_completion(
146
  messages=messages,
147
  temperature=request.temperature,
148
  stream=True
149
  )
150
+
151
+ # **CRITICAL: send the status packet again immediately after starting the stream**
152
+ # This guarantees the frontend will receive the server's authoritative status even
153
+ # if it creates the indicator DOM slightly later (race condition on the client).
154
+ yield f"data: {json.dumps({'status': status_indicator})}\n\n"
155
+
156
  for chunk in stream:
157
+ yield f"data: {json.dumps(chunk)}\n\n"
 
 
 
 
 
 
 
 
158
  yield "data: [DONE]\n\n"
159
  except Exception as e:
160
  logger.exception(f"Error while streaming: {e}")