Spaces:

Nexari-Research
/

Nexari-G1.1

Running

App Files Files Community

Nexari-Research commited on 8 days ago

Commit

54d8f9b

verified ·

1 Parent(s): 728bca0

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -32

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py (updated)
 import os
 import json
 import logging
@@ -72,18 +71,8 @@ def get_intent(last_user_message: str):
             messages=[{"role": "system", "content": sys_prompt}, {"role": "user", "content": last_user_message}],
             temperature=0.1, max_tokens=50
         )
-        # Some Llama bindings return the assistant message differently.
-        # Try to be robust in extracting the content.
-        content = ""
-        try:
-            content = res['choices'][0]['message']['content'].lower()
-        except Exception:
-            # fallback to other known key shapes
-            try:
-                content = res['choices'][0]['text'].lower()
-            except Exception:
-                content = ""
         if "coding" in content:
             return "coding", "neutral"
         # Reasoning intent detection
@@ -149,32 +138,23 @@ async def chat_endpoint(request: ChatRequest):
     # 3. Stream Response with Status Packet
     def iter_response():
         try:
-            # === IMPROVEMENT: Send a properly-framed SSE 'status' event first ===
-            # Many clients parse SSE best when status is an explicit event and separated from token stream.
-            # This ensures the frontend's DynamicIndicator.updateText(...) sees the new status reliably.
-            status_event = f"event: status\n"
-            status_event += f"data: {json.dumps({'status': status_indicator})}\n\n"
-            yield status_event
-            # Small keepalive / separator to reduce buffering risk
-            yield ":\n\n"
-            # Now start streaming model output as before (each chunk as SSE 'data: ...')
             stream = selected_model.create_chat_completion(
                 messages=messages,
                 temperature=request.temperature,
                 stream=True
             )
             for chunk in stream:
-                # chunk is expected to be a dict-like SSE payload from llama-cpp
-                # send as a data: <json>\n\n line so the client JSON.parse(...) works
-                try:
-                    yield f"data: {json.dumps(chunk)}\n\n"
-                except Exception:
-                    # In case chunk contains non-serializable objects, convert safe repr
-                    yield f"data: {json.dumps({'text': str(chunk)})}\n\n"
-            # Final done marker
             yield "data: [DONE]\n\n"
         except Exception as e:
             logger.exception(f"Error while streaming: {e}")

 import os
 import json
 import logging
             messages=[{"role": "system", "content": sys_prompt}, {"role": "user", "content": last_user_message}],
             temperature=0.1, max_tokens=50
         )
+        content = res['choices'][0]['message']['content'].lower()
         if "coding" in content:
             return "coding", "neutral"
         # Reasoning intent detection
     # 3. Stream Response with Status Packet
     def iter_response():
         try:
+            # Send status packet first (frontend expects this to update its indicator)
+            yield f"data: {json.dumps({'status': status_indicator})}\n\n"
+            # Start model streaming generator
             stream = selected_model.create_chat_completion(
                 messages=messages,
                 temperature=request.temperature,
                 stream=True
             )
+            # **CRITICAL: send the status packet again immediately after starting the stream**
+            # This guarantees the frontend will receive the server's authoritative status even
+            # if it creates the indicator DOM slightly later (race condition on the client).
+            yield f"data: {json.dumps({'status': status_indicator})}\n\n"
             for chunk in stream:
+                yield f"data: {json.dumps(chunk)}\n\n"
             yield "data: [DONE]\n\n"
         except Exception as e:
             logger.exception(f"Error while streaming: {e}")