Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on 21 days ago

Commit

47a3efb

1 Parent(s): 95cb26e

update stt

Browse files

Files changed (2) hide show

requirements.txt +1 -1
services/stt_service.py +28 -34

requirements.txt CHANGED Viewed

@@ -10,4 +10,4 @@ huggingface_hub>=0.19.0
 python-multipart
 google-genai>=0.4.0
 gtts==2.5.1

 python-multipart
 google-genai>=0.4.0
 gtts==2.5.1
+requests==2.31.0

services/stt_service.py CHANGED Viewed

@@ -1,33 +1,13 @@
-import torch
-import torchaudio
-from transformers import pipeline
 import logging
 import tempfile
 import os
 logger = logging.getLogger(__name__)
-# Global STT pipeline
-stt_pipeline = None
-def load_stt_model():
-    """Load the free Whisper model for speech-to-text"""
-    global stt_pipeline
-    try:
-        logger.info("Loading Whisper STT model...")
-        stt_pipeline = pipeline(
-            "automatic-speech-recognition",
-            model="openai/whisper-small",  # Free model
-            device="cpu"  # Use CPU to avoid GPU requirements
-        )
-        logger.info("✓ Whisper STT model loaded successfully")
-    except Exception as e:
-        logger.error(f"✗ Failed to load Whisper model: {str(e)}")
-        stt_pipeline = None
 async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     """
-    Convert audio bytes to text using free Whisper model.
     Args:
         audio_bytes: Raw audio file bytes
@@ -36,15 +16,8 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     Returns:
         Transcribed text
     """
-    global stt_pipeline
     try:
-        if stt_pipeline is None:
-            load_stt_model()
-            if stt_pipeline is None:
-                raise Exception("STT model failed to load")
-        logger.info(f"Converting audio to text using Whisper")
         # Save audio bytes to temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
@@ -52,10 +25,20 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
             temp_audio_path = temp_audio.name
         try:
-            # Transcribe using Whisper
-            result = stt_pipeline(temp_audio_path)
-            transcribed_text = result.get("text", "").strip()
             if not transcribed_text:
                 transcribed_text = "Sorry, I couldn't understand the audio."
@@ -69,4 +52,15 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     except Exception as e:
         logger.error(f"✗ STT failed: {str(e)}")
-        raise Exception(f"Speech-to-text conversion failed: {str(e)}")

+import requests
 import logging
 import tempfile
 import os
 logger = logging.getLogger(__name__)
 async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     """
+    Convert audio bytes to text using Hugging Face Inference API (free).
     Args:
         audio_bytes: Raw audio file bytes
     Returns:
         Transcribed text
     """
     try:
+        logger.info(f"Converting audio to text using Hugging Face API")
         # Save audio bytes to temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
             temp_audio_path = temp_audio.name
         try:
+            # Use Hugging Face Inference API (free)
+            API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
+            headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}  # Optional for free tier
+            with open(temp_audio_path, "rb") as f:
+                response = requests.post(API_URL, headers=headers, data=f)
+            if response.status_code == 200:
+                result = response.json()
+                transcribed_text = result.get("text", "").strip()
+            else:
+                # Fallback to local model if API fails
+                transcribed_text = await fallback_stt(audio_bytes, filename)
             if not transcribed_text:
                 transcribed_text = "Sorry, I couldn't understand the audio."
     except Exception as e:
         logger.error(f"✗ STT failed: {str(e)}")
+        return "Sorry, there was an error processing your audio."
+async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
+    """Fallback STT using a simpler approach"""
+    try:
+        # Simple fallback - you could implement a basic speech recognition here
+        # For now, return a placeholder
+        return "Audio received but transcription service is temporarily unavailable."
+    except Exception as e:
+        logger.error(f"Fallback STT also failed: {str(e)}")
+        return "Audio processing failed."