Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

App Files Files Community

malek-messaoudii commited on 24 days ago

Commit

3b2b211

1 Parent(s): e8aa76b

Update requirements and refactor STT and chatbot services for improved model loading and error handling

Browse files

Files changed (3) hide show

requirements.txt +2 -1
services/chatbot_service.py +11 -47
services/stt_service.py +58 -32

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ huggingface_hub>=0.19.0
 python-multipart
 google-genai>=0.4.0
 gtts==2.5.1
-requests==2.31.0

 python-multipart
 google-genai>=0.4.0
 gtts==2.5.1
+requests==2.31.0
+ffmpeg-python==0.2.0

services/chatbot_service.py CHANGED Viewed

@@ -1,25 +1,21 @@
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import logging
 logger = logging.getLogger(__name__)
-# Global chatbot components
 chatbot_pipeline = None
-chat_history = {}
 def load_chatbot_model():
-    """Load a better free chatbot model"""
     global chatbot_pipeline
     try:
-        logger.info("Loading better chatbot model...")
-        # Use a more reliable model
-        model_name = "microsoft/DialoGPT-small"  # More reliable than medium
         chatbot_pipeline = pipeline(
             "text-generation",
-            model=model_name,
-            tokenizer=model_name,
             device="cpu"
         )
         logger.info("✓ Chatbot model loaded successfully")
@@ -44,15 +40,14 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
         # Prepare prompt
         prompt = f"User: {user_text}\nAssistant:"
-        # Generate response with better parameters
         response = chatbot_pipeline(
             prompt,
-            max_new_tokens=100,  # Reduced for better responses
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
-            pad_token_id=chatbot_pipeline.tokenizer.eos_token_id,
-            repetition_penalty=1.1
         )
         # Extract the response
@@ -65,8 +60,6 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
             bot_response = generated_text.replace(prompt, "").strip()
         # Clean up the response
-        bot_response = clean_response(bot_response)
         if not bot_response:
             bot_response = get_fallback_response(user_text)
@@ -77,41 +70,12 @@ async def get_chatbot_response(user_text: str, user_id: str = "default") -> str:
         logger.error(f"✗ Chatbot response failed: {str(e)}")
         return get_fallback_response(user_text)
-def clean_response(response: str) -> str:
-    """Clean and format the chatbot response"""
-    if not response:
-        return ""
-    # Remove extra spaces
-    response = ' '.join(response.split())
-    # Remove any incomplete sentences at the end
-    if len(response) > 1:
-        # Ensure it ends with proper punctuation
-        if not response.endswith(('.', '!', '?')):
-            # Find the last sentence end
-            last_period = response.rfind('.')
-            last_exclamation = response.rfind('!')
-            last_question = response.rfind('?')
-            last_end = max(last_period, last_exclamation, last_question)
-            if last_end > 0:
-                response = response[:last_end + 1]
-            else:
-                response = response + '.'
-    return response.strip()
 def get_fallback_response(user_text: str) -> str:
-    """Provide better fallback responses"""
     fallback_responses = [
         f"I understand you said: '{user_text}'. How can I help you with that?",
         f"That's interesting! Regarding '{user_text}', what would you like to know?",
-        f"Thanks for your message about '{user_text}'. How can I assist you further?",
-        f"I heard you mention '{user_text}'. Could you tell me more about what you need?",
-        f"Regarding '{user_text}', I'd be happy to help. What specific information are you looking for?"
     ]
     import random

+# services/chatbot_service.py (CONFIRMED WORKING VERSION)
+from transformers import pipeline
 import logging
 logger = logging.getLogger(__name__)
+# Global chatbot pipeline
 chatbot_pipeline = None
 def load_chatbot_model():
+    """Load the free chatbot model"""
     global chatbot_pipeline
     try:
+        logger.info("Loading DialoGPT chatbot model...")
         chatbot_pipeline = pipeline(
             "text-generation",
+            model="microsoft/DialoGPT-small",
             device="cpu"
         )
         logger.info("✓ Chatbot model loaded successfully")
         # Prepare prompt
         prompt = f"User: {user_text}\nAssistant:"
+        # Generate response
         response = chatbot_pipeline(
             prompt,
+            max_new_tokens=100,
             do_sample=True,
             temperature=0.7,
             top_p=0.9,
+            pad_token_id=chatbot_pipeline.tokenizer.eos_token_id
         )
         # Extract the response
             bot_response = generated_text.replace(prompt, "").strip()
         # Clean up the response
         if not bot_response:
             bot_response = get_fallback_response(user_text)
         logger.error(f"✗ Chatbot response failed: {str(e)}")
         return get_fallback_response(user_text)
 def get_fallback_response(user_text: str) -> str:
+    """Provide fallback responses"""
     fallback_responses = [
         f"I understand you said: '{user_text}'. How can I help you with that?",
         f"That's interesting! Regarding '{user_text}', what would you like to know?",
+        f"Thanks for your message about '{user_text}'. How can I assist you further?"
     ]
     import random

services/stt_service.py CHANGED Viewed

@@ -1,29 +1,68 @@
-import requests
 import logging
 import tempfile
 import os
 logger = logging.getLogger(__name__)
 async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     """
-    Convert audio bytes to text using Hugging Face Inference API.
-    No ffmpeg required!
     """
     try:
-        logger.info(f"Converting audio to text using Hugging Face API")
-        # Use Hugging Face Inference API (free, no ffmpeg needed)
-        API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
-        # For Hugging Face Spaces, you might not need an API key for public models
-        headers = {}
-        # Send audio bytes directly to Hugging Face API
-        response = requests.post(API_URL, headers=headers, data=audio_bytes)
-        if response.status_code == 200:
-            result = response.json()
             transcribed_text = result.get("text", "").strip()
             if not transcribed_text:
@@ -32,26 +71,13 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
             logger.info(f"✓ STT successful: '{transcribed_text}'")
             return transcribed_text
-        else:
-            # If API fails, use fallback
-            error_msg = f"Hugging Face API error: {response.status_code}"
-            logger.error(error_msg)
-            return await fallback_stt(audio_bytes, filename)
     except Exception as e:
         logger.error(f"✗ STT failed: {str(e)}")
-        return await fallback_stt(audio_bytes, filename)
-async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
-    """Fallback STT using a simpler approach"""
-    try:
-        # Simple fallback that doesn't require ffmpeg
-        file_size = len(audio_bytes)
-        file_type = filename.split('.')[-1] if '.' in filename else 'unknown'
-        return f"Audio file '{filename}' ({file_type}, {file_size} bytes) received successfully. For full STT, please ensure ffmpeg is installed or use the Hugging Face API directly."
-    except Exception as e:
-        logger.error(f"Fallback STT also failed: {str(e)}")
-        return "Audio processing failed. Please try a different audio format or install ffmpeg."

+import torch
+from transformers import pipeline
 import logging
 import tempfile
 import os
+import subprocess
 logger = logging.getLogger(__name__)
+# Global STT pipeline
+stt_pipeline = None
+def load_stt_model():
+    """Load the free Whisper model for speech-to-text"""
+    global stt_pipeline
+    try:
+        # Check if ffmpeg is available
+        if not check_ffmpeg():
+            logger.warning("ffmpeg not found. STT may not work properly.")
+        logger.info("Loading Whisper-medium STT model...")
+        stt_pipeline = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-medium",
+            device="cpu"
+        )
+        logger.info("✓ Whisper-medium STT model loaded successfully")
+    except Exception as e:
+        logger.error(f"✗ Failed to load Whisper-medium model: {str(e)}")
+        stt_pipeline = None
+def check_ffmpeg():
+    """Check if ffmpeg is available"""
+    try:
+        subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
 async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
     """
+    Convert audio bytes to text using free Whisper model.
     """
+    global stt_pipeline
     try:
+        if stt_pipeline is None:
+            load_stt_model()
+            if stt_pipeline is None:
+                raise Exception("STT model failed to load")
+        # Check ffmpeg again before processing
+        if not check_ffmpeg():
+            return "Error: ffmpeg is required for audio processing but is not installed. Please install ffmpeg on the server."
+        logger.info(f"Converting audio to text using Whisper-medium")
+        # Save audio bytes to temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
+            temp_audio.write(audio_bytes)
+            temp_audio_path = temp_audio.name
+        try:
+            # Transcribe using Whisper
+            result = stt_pipeline(temp_audio_path)
             transcribed_text = result.get("text", "").strip()
             if not transcribed_text:
             logger.info(f"✓ STT successful: '{transcribed_text}'")
             return transcribed_text
+        finally:
+            # Clean up temporary file
+            if os.path.exists(temp_audio_path):
+                os.unlink(temp_audio_path)
     except Exception as e:
         logger.error(f"✗ STT failed: {str(e)}")
+        if "ffmpeg" in str(e).lower():
+            return "Audio processing failed: ffmpeg is required but not installed. Please install ffmpeg on the server."
+        raise Exception(f"Speech-to-text conversion failed: {str(e)}")