Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on 26 days ago

Commit

544d113

1 Parent(s): 4f1c42b

Correct files

Browse files

Files changed (5) hide show

models/audio.py +0 -2
routes/audio.py +28 -10
services/gemini_client.py +1 -1
services/stt_service.py +2 -2
services/tts_service.py +1 -4

models/audio.py CHANGED Viewed

@@ -23,7 +23,6 @@ class STTResponse(BaseModel):
         description="Approximate audio duration in seconds"
     )
 # ==============================
 # TEXT TO SPEECH REQUEST / RESPONSE
 # ==============================
@@ -33,7 +32,6 @@ class TTSRequest(BaseModel):
     )
     text: str = Field(..., min_length=1, max_length=500, description="Text to convert to speech")
 class TTSResponse(BaseModel):
     model_config = ConfigDict(
         json_schema_extra={

         description="Approximate audio duration in seconds"
     )
 # ==============================
 # TEXT TO SPEECH REQUEST / RESPONSE
 # ==============================
     )
     text: str = Field(..., min_length=1, max_length=500, description="Text to convert to speech")
 class TTSResponse(BaseModel):
     model_config = ConfigDict(
         json_schema_extra={

routes/audio.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from fastapi import APIRouter, UploadFile, File, HTTPException
-from services.tts_service import generate_tts
-from services.stt_service import speech_to_text
 from fastapi.responses import StreamingResponse
 import io
 router = APIRouter(prefix="/audio", tags=["Audio"])
-# ======================
-# TEXT TO SPEECH
-# ======================
 @router.post("/tts")
 async def tts(text: str):
     try:
@@ -16,13 +16,12 @@ async def tts(text: str):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-    # Return as streaming response without saving file
     return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav")
-# ======================
-# SPEECH TO TEXT
-# ======================
 @router.post("/stt")
 async def stt(file: UploadFile = File(...)):
     try:
@@ -30,5 +29,24 @@ async def stt(file: UploadFile = File(...)):
         text = await speech_to_text(audio_bytes)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     return {"text": text}

 from fastapi import APIRouter, UploadFile, File, HTTPException
 from fastapi.responses import StreamingResponse
 import io
+from services.tts_service import generate_tts
+from services.stt_service import speech_to_text
 router = APIRouter(prefix="/audio", tags=["Audio"])
+# ------------------------
+# Text to Speech
+# ------------------------
 @router.post("/tts")
 async def tts(text: str):
     try:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav")
+# ------------------------
+# Speech to Text
+# ------------------------
 @router.post("/stt")
 async def stt(file: UploadFile = File(...)):
     try:
         text = await speech_to_text(audio_bytes)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     return {"text": text}
+# ------------------------
+# Voice Chatbot: User sends voice → TTS reply
+# ------------------------
+@router.post("/chatbot")
+async def chatbot(file: UploadFile = File(...)):
+    try:
+        audio_bytes = await file.read()
+        user_text = await speech_to_text(audio_bytes)
+        # Replace with your NLP logic or chatbot response
+        response_text = f"You said: {user_text}"
+        audio_response = await generate_tts(response_text)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav")

services/gemini_client.py CHANGED Viewed

@@ -2,7 +2,7 @@ from google.genai import Client
 import os
 def get_gemini_client():
-    api_key = os.getenv("GOOGLE_GENAI_API_KEY")  # store it in .env
     if not api_key:
         raise ValueError("Missing GOOGLE_GENAI_API_KEY environment variable")
     return Client(api_key=api_key)

 import os
 def get_gemini_client():
+    api_key = os.getenv("GOOGLE_GENAI_API_KEY")
     if not api_key:
         raise ValueError("Missing GOOGLE_GENAI_API_KEY environment variable")
     return Client(api_key=api_key)

services/stt_service.py CHANGED Viewed

@@ -4,12 +4,12 @@ from google.genai import types
 async def speech_to_text(audio_bytes: bytes) -> str:
     client = get_gemini_client()
-    # Correctly wrap audio bytes using types.File
     audio_file = types.File(data=audio_bytes, mime_type="audio/wav")
     response = client.models.generate_content(
         model="gemini-2.5-flash",
-        contents=[audio_file]  # <-- pass as a list of types.File
     )
     return response.text

 async def speech_to_text(audio_bytes: bytes) -> str:
     client = get_gemini_client()
+    # Wrap audio bytes correctly
     audio_file = types.File(data=audio_bytes, mime_type="audio/wav")
     response = client.models.generate_content(
         model="gemini-2.5-flash",
+        contents=[audio_file]
     )
     return response.text

services/tts_service.py CHANGED Viewed

@@ -3,9 +3,6 @@ from google.genai import types
 import base64
 async def generate_tts(text: str) -> bytes:
-    """
-    Convert text to speech using Gemini API and return valid WAV bytes
-    """
     client = get_gemini_client()
     response = client.models.generate_content(
@@ -21,7 +18,7 @@ async def generate_tts(text: str) -> bytes:
         ),
     )
-    # Gemini returns base64 audio, decode it to bytes
     audio_base64 = response.candidates[0].content.parts[0].inline_data.data
     audio_bytes = base64.b64decode(audio_base64)

 import base64
 async def generate_tts(text: str) -> bytes:
     client = get_gemini_client()
     response = client.models.generate_content(
         ),
     )
+    # Decode base64 audio into bytes
     audio_base64 = response.candidates[0].content.parts[0].inline_data.data
     audio_bytes = base64.b64decode(audio_base64)