malek-messaoudii
Update requirements and refactor STT and chatbot services for improved model loading and error handling
3b2b211
| import torch | |
| from transformers import pipeline | |
| import logging | |
| import tempfile | |
| import os | |
| import subprocess | |
| logger = logging.getLogger(__name__) | |
| # Global STT pipeline | |
| stt_pipeline = None | |
| def load_stt_model(): | |
| """Load the free Whisper model for speech-to-text""" | |
| global stt_pipeline | |
| try: | |
| # Check if ffmpeg is available | |
| if not check_ffmpeg(): | |
| logger.warning("ffmpeg not found. STT may not work properly.") | |
| logger.info("Loading Whisper-medium STT model...") | |
| stt_pipeline = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-medium", | |
| device="cpu" | |
| ) | |
| logger.info("β Whisper-medium STT model loaded successfully") | |
| except Exception as e: | |
| logger.error(f"β Failed to load Whisper-medium model: {str(e)}") | |
| stt_pipeline = None | |
| def check_ffmpeg(): | |
| """Check if ffmpeg is available""" | |
| try: | |
| subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True) | |
| return True | |
| except (subprocess.CalledProcessError, FileNotFoundError): | |
| return False | |
| async def speech_to_text(audio_bytes: bytes, filename: str) -> str: | |
| """ | |
| Convert audio bytes to text using free Whisper model. | |
| """ | |
| global stt_pipeline | |
| try: | |
| if stt_pipeline is None: | |
| load_stt_model() | |
| if stt_pipeline is None: | |
| raise Exception("STT model failed to load") | |
| # Check ffmpeg again before processing | |
| if not check_ffmpeg(): | |
| return "Error: ffmpeg is required for audio processing but is not installed. Please install ffmpeg on the server." | |
| logger.info(f"Converting audio to text using Whisper-medium") | |
| # Save audio bytes to temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio: | |
| temp_audio.write(audio_bytes) | |
| temp_audio_path = temp_audio.name | |
| try: | |
| # Transcribe using Whisper | |
| result = stt_pipeline(temp_audio_path) | |
| transcribed_text = result.get("text", "").strip() | |
| if not transcribed_text: | |
| transcribed_text = "No speech detected in the audio." | |
| logger.info(f"β STT successful: '{transcribed_text}'") | |
| return transcribed_text | |
| finally: | |
| # Clean up temporary file | |
| if os.path.exists(temp_audio_path): | |
| os.unlink(temp_audio_path) | |
| except Exception as e: | |
| logger.error(f"β STT failed: {str(e)}") | |
| if "ffmpeg" in str(e).lower(): | |
| return "Audio processing failed: ffmpeg is required but not installed. Please install ffmpeg on the server." | |
| raise Exception(f"Speech-to-text conversion failed: {str(e)}") |