Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on 20 days ago

Commit

a8c8142

1 Parent(s): 47a3efb

fix errors

Browse files

Files changed (2) hide show

routes/audio.py +8 -22
services/stt_service.py +33 -28

routes/audio.py CHANGED Viewed

@@ -17,20 +17,16 @@ router = APIRouter(prefix="/audio", tags=["Audio"])
 async def startup_event():
     """Load models when the router starts"""
     logger.info("Loading free STT and Chatbot models...")
-    load_stt_model()
-    load_chatbot_model()
 @router.post("/tts")
 async def tts(request: TTSRequest):
-    """
-    Convert text to speech and return audio file using free gTTS.
-    Example:
-    - POST /audio/tts
-    - Body: {"text": "Hello, welcome to our system"}
-    - Returns: MP3 audio file
-    """
     try:
         logger.info(f"TTS request received for text: '{request.text}'")
         audio_bytes = await generate_tts(request.text)
@@ -39,17 +35,8 @@ async def tts(request: TTSRequest):
         logger.error(f"TTS error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/stt", response_model=STTResponse)
 async def stt(file: UploadFile = File(...)):
-    """
-    Convert audio file to text using free Whisper model.
-    Example:
-    - POST /audio/stt
-    - File: audio.mp3 (or .wav, .m4a)
-    - Returns: {"text": "transcribed text", "model_name": "whisper-small", ...}
-    """
     # Validate file type
     if file.content_type not in ALLOWED_AUDIO_TYPES:
         raise HTTPException(
@@ -72,7 +59,7 @@ async def stt(file: UploadFile = File(...)):
         return STTResponse(
             text=text,
-            model_name="whisper-small",
             language="en",
             duration_seconds=None
         )
@@ -80,7 +67,6 @@ async def stt(file: UploadFile = File(...)):
         logger.error(f"STT error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/chatbot")
 async def chatbot_voice(file: UploadFile = File(...)):
     """

 async def startup_event():
     """Load models when the router starts"""
     logger.info("Loading free STT and Chatbot models...")
+    try:
+        load_stt_model()
+        load_chatbot_model()
+        logger.info("✓ Models loaded successfully")
+    except Exception as e:
+        logger.error(f"✗ Model loading failed: {str(e)}")
+# ... rest of your routes remain the same ...
 @router.post("/tts")
 async def tts(request: TTSRequest):
     try:
         logger.info(f"TTS request received for text: '{request.text}'")
         audio_bytes = await generate_tts(request.text)
         logger.error(f"TTS error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/stt", response_model=STTResponse)
 async def stt(file: UploadFile = File(...)):
     # Validate file type
     if file.content_type not in ALLOWED_AUDIO_TYPES:
         raise HTTPException(
         return STTResponse(
             text=text,
+            model_name="whisper-medium",
             language="en",
             duration_seconds=None
         )
         logger.error(f"STT error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @router.post("/chatbot")
 async def chatbot_voice(file: UploadFile = File(...)):
     """

services/stt_service.py CHANGED Viewed

@@ -1,13 +1,32 @@
-import requests
 import logging
 import tempfile
 import os
 logger = logging.getLogger(__name__)
 async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     """
-    Convert audio bytes to text using Hugging Face Inference API (free).
     Args:
         audio_bytes: Raw audio file bytes
@@ -16,8 +35,15 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     Returns:
         Transcribed text
     """
     try:
-        logger.info(f"Converting audio to text using Hugging Face API")
         # Save audio bytes to temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
@@ -25,20 +51,10 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
             temp_audio_path = temp_audio.name
         try:
-            # Use Hugging Face Inference API (free)
-            API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
-            headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}  # Optional for free tier
-            with open(temp_audio_path, "rb") as f:
-                response = requests.post(API_URL, headers=headers, data=f)
-            if response.status_code == 200:
-                result = response.json()
-                transcribed_text = result.get("text", "").strip()
-            else:
-                # Fallback to local model if API fails
-                transcribed_text = await fallback_stt(audio_bytes, filename)
             if not transcribed_text:
                 transcribed_text = "Sorry, I couldn't understand the audio."
@@ -52,15 +68,4 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     except Exception as e:
         logger.error(f"✗ STT failed: {str(e)}")
-        return "Sorry, there was an error processing your audio."
-async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
-    """Fallback STT using a simpler approach"""
-    try:
-        # Simple fallback - you could implement a basic speech recognition here
-        # For now, return a placeholder
-        return "Audio received but transcription service is temporarily unavailable."
-    except Exception as e:
-        logger.error(f"Fallback STT also failed: {str(e)}")
-        return "Audio processing failed."

+import torch
+from transformers import pipeline
 import logging
 import tempfile
 import os
 logger = logging.getLogger(__name__)
+# Global STT pipeline
+stt_pipeline = None
+def load_stt_model():
+    """Load the free Whisper model for speech-to-text"""
+    global stt_pipeline
+    try:
+        logger.info("Loading Whisper-medium STT model...")
+        stt_pipeline = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-medium",
+            device="cpu"
+        )
+        logger.info("✓ Whisper-medium STT model loaded successfully")
+    except Exception as e:
+        logger.error(f"✗ Failed to load Whisper-medium model: {str(e)}")
+        stt_pipeline = None
 async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     """
+    Convert audio bytes to text using free Whisper model.
     Args:
         audio_bytes: Raw audio file bytes
     Returns:
         Transcribed text
     """
+    global stt_pipeline
     try:
+        if stt_pipeline is None:
+            load_stt_model()
+            if stt_pipeline is None:
+                raise Exception("STT model failed to load")
+        logger.info(f"Converting audio to text using Whisper-medium")
         # Save audio bytes to temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
             temp_audio_path = temp_audio.name
         try:
+            # Transcribe using Whisper
+            result = stt_pipeline(temp_audio_path)
+            transcribed_text = result.get("text", "").strip()
             if not transcribed_text:
                 transcribed_text = "Sorry, I couldn't understand the audio."
     except Exception as e:
         logger.error(f"✗ STT failed: {str(e)}")
+        raise Exception(f"Speech-to-text conversion failed: {str(e)}")