Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on 25 days ago

Commit

d4b6133

1 Parent(s): 520a06a

Update codes

Browse files

Files changed (2) hide show

routes/audio.py +18 -5
services/stt_service.py +11 -3

routes/audio.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from fastapi import APIRouter, UploadFile, File, HTTPException
 from fastapi.responses import StreamingResponse
 import io
 from services.tts_service import generate_tts
 from services.stt_service import speech_to_text
@@ -11,6 +12,9 @@ router = APIRouter(prefix="/audio", tags=["Audio"])
 # ------------------------
 @router.post("/tts")
 async def tts(text: str):
     try:
         audio_bytes = await generate_tts(text)
     except Exception as e:
@@ -46,25 +50,34 @@ async def stt(file: UploadFile = File(...)):
         )
     try:
-        audio_bytes = await file.read()  # Read uploaded file
-        text = await speech_to_text(audio_bytes)  # Call your Gemini STT
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     return {"text": text}
 # ------------------------
 # Voice Chatbot: User sends voice → TTS reply
 # ------------------------
 @router.post("/chatbot")
 async def chatbot(file: UploadFile = File(...)):
     try:
         audio_bytes = await file.read()
-        user_text = await speech_to_text(audio_bytes)
-        # Replace with your NLP logic or chatbot response
         response_text = f"You said: {user_text}"
         audio_response = await generate_tts(response_text)

 from fastapi import APIRouter, UploadFile, File, HTTPException
 from fastapi.responses import StreamingResponse
 import io
+import mimetypes
 from services.tts_service import generate_tts
 from services.stt_service import speech_to_text
 # ------------------------
 @router.post("/tts")
 async def tts(text: str):
+    """
+    Convert text to speech and return audio.
+    """
     try:
         audio_bytes = await generate_tts(text)
     except Exception as e:
         )
     try:
+        audio_bytes = await file.read()
+        text = await speech_to_text(audio_bytes, file.filename)  # Pass filename for correct MIME
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     return {"text": text}
 # ------------------------
 # Voice Chatbot: User sends voice → TTS reply
 # ------------------------
 @router.post("/chatbot")
 async def chatbot(file: UploadFile = File(...)):
+    """
+    User sends an audio file, the system converts to text, generates response, and returns TTS audio.
+    """
+    # Validate MIME type
+    if file.content_type not in ALLOWED_AUDIO_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsupported audio format: {file.content_type}. Supported: WAV, MP3, M4A"
+        )
     try:
         audio_bytes = await file.read()
+        user_text = await speech_to_text(audio_bytes, file.filename)
+        # Replace this with your NLP or chatbot logic
         response_text = f"You said: {user_text}"
         audio_response = await generate_tts(response_text)

services/stt_service.py CHANGED Viewed

@@ -1,11 +1,19 @@
 from services.gemini_client import get_gemini_client
 from google.genai import types
-async def speech_to_text(audio_bytes: bytes) -> str:
     client = get_gemini_client()
-    # Wrap audio bytes correctly
-    audio_file = types.File(data=audio_bytes, mime_type="audio/wav")
     response = client.models.generate_content(
         model="gemini-2.5-flash",

 from services.gemini_client import get_gemini_client
 from google.genai import types
+import mimetypes  # <- Add this
+async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
+    """
+    Convert audio bytes to text using Gemini API. Supports WAV, MP3, M4A.
+    """
     client = get_gemini_client()
+    # Detect MIME type from filename
+    mime_type, _ = mimetypes.guess_type(filename)
+    if mime_type is None:
+        mime_type = "audio/wav"  # fallback
+    audio_file = types.File(data=audio_bytes, mime_type=mime_type)
     response = client.models.generate_content(
         model="gemini-2.5-flash",