Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# app.py (updated)
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
import logging
|
|
@@ -72,18 +71,8 @@ def get_intent(last_user_message: str):
|
|
| 72 |
messages=[{"role": "system", "content": sys_prompt}, {"role": "user", "content": last_user_message}],
|
| 73 |
temperature=0.1, max_tokens=50
|
| 74 |
)
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
content = ""
|
| 78 |
-
try:
|
| 79 |
-
content = res['choices'][0]['message']['content'].lower()
|
| 80 |
-
except Exception:
|
| 81 |
-
# fallback to other known key shapes
|
| 82 |
-
try:
|
| 83 |
-
content = res['choices'][0]['text'].lower()
|
| 84 |
-
except Exception:
|
| 85 |
-
content = ""
|
| 86 |
-
|
| 87 |
if "coding" in content:
|
| 88 |
return "coding", "neutral"
|
| 89 |
# Reasoning intent detection
|
|
@@ -149,32 +138,23 @@ async def chat_endpoint(request: ChatRequest):
|
|
| 149 |
# 3. Stream Response with Status Packet
|
| 150 |
def iter_response():
|
| 151 |
try:
|
| 152 |
-
#
|
| 153 |
-
|
| 154 |
-
# This ensures the frontend's DynamicIndicator.updateText(...) sees the new status reliably.
|
| 155 |
-
status_event = f"event: status\n"
|
| 156 |
-
status_event += f"data: {json.dumps({'status': status_indicator})}\n\n"
|
| 157 |
-
yield status_event
|
| 158 |
|
| 159 |
-
#
|
| 160 |
-
yield ":\n\n"
|
| 161 |
-
|
| 162 |
-
# Now start streaming model output as before (each chunk as SSE 'data: ...')
|
| 163 |
stream = selected_model.create_chat_completion(
|
| 164 |
messages=messages,
|
| 165 |
temperature=request.temperature,
|
| 166 |
stream=True
|
| 167 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
for chunk in stream:
|
| 169 |
-
|
| 170 |
-
# send as a data: <json>\n\n line so the client JSON.parse(...) works
|
| 171 |
-
try:
|
| 172 |
-
yield f"data: {json.dumps(chunk)}\n\n"
|
| 173 |
-
except Exception:
|
| 174 |
-
# In case chunk contains non-serializable objects, convert safe repr
|
| 175 |
-
yield f"data: {json.dumps({'text': str(chunk)})}\n\n"
|
| 176 |
-
|
| 177 |
-
# Final done marker
|
| 178 |
yield "data: [DONE]\n\n"
|
| 179 |
except Exception as e:
|
| 180 |
logger.exception(f"Error while streaming: {e}")
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
import logging
|
|
|
|
| 71 |
messages=[{"role": "system", "content": sys_prompt}, {"role": "user", "content": last_user_message}],
|
| 72 |
temperature=0.1, max_tokens=50
|
| 73 |
)
|
| 74 |
+
content = res['choices'][0]['message']['content'].lower()
|
| 75 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if "coding" in content:
|
| 77 |
return "coding", "neutral"
|
| 78 |
# Reasoning intent detection
|
|
|
|
| 138 |
# 3. Stream Response with Status Packet
|
| 139 |
def iter_response():
|
| 140 |
try:
|
| 141 |
+
# Send status packet first (frontend expects this to update its indicator)
|
| 142 |
+
yield f"data: {json.dumps({'status': status_indicator})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
+
# Start model streaming generator
|
|
|
|
|
|
|
|
|
|
| 145 |
stream = selected_model.create_chat_completion(
|
| 146 |
messages=messages,
|
| 147 |
temperature=request.temperature,
|
| 148 |
stream=True
|
| 149 |
)
|
| 150 |
+
|
| 151 |
+
# **CRITICAL: send the status packet again immediately after starting the stream**
|
| 152 |
+
# This guarantees the frontend will receive the server's authoritative status even
|
| 153 |
+
# if it creates the indicator DOM slightly later (race condition on the client).
|
| 154 |
+
yield f"data: {json.dumps({'status': status_indicator})}\n\n"
|
| 155 |
+
|
| 156 |
for chunk in stream:
|
| 157 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
yield "data: [DONE]\n\n"
|
| 159 |
except Exception as e:
|
| 160 |
logger.exception(f"Error while streaming: {e}")
|