malek-messaoudii
Update requirements and refactor STT and chatbot services for improved model loading and error handling
3b2b211
raw
history blame
2.89 kB
import torch
from transformers import pipeline
import logging
import tempfile
import os
import subprocess
logger = logging.getLogger(__name__)
# Global STT pipeline
stt_pipeline = None
def load_stt_model():
"""Load the free Whisper model for speech-to-text"""
global stt_pipeline
try:
# Check if ffmpeg is available
if not check_ffmpeg():
logger.warning("ffmpeg not found. STT may not work properly.")
logger.info("Loading Whisper-medium STT model...")
stt_pipeline = pipeline(
"automatic-speech-recognition",
model="openai/whisper-medium",
device="cpu"
)
logger.info("βœ“ Whisper-medium STT model loaded successfully")
except Exception as e:
logger.error(f"βœ— Failed to load Whisper-medium model: {str(e)}")
stt_pipeline = None
def check_ffmpeg():
"""Check if ffmpeg is available"""
try:
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
"""
Convert audio bytes to text using free Whisper model.
"""
global stt_pipeline
try:
if stt_pipeline is None:
load_stt_model()
if stt_pipeline is None:
raise Exception("STT model failed to load")
# Check ffmpeg again before processing
if not check_ffmpeg():
return "Error: ffmpeg is required for audio processing but is not installed. Please install ffmpeg on the server."
logger.info(f"Converting audio to text using Whisper-medium")
# Save audio bytes to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
temp_audio.write(audio_bytes)
temp_audio_path = temp_audio.name
try:
# Transcribe using Whisper
result = stt_pipeline(temp_audio_path)
transcribed_text = result.get("text", "").strip()
if not transcribed_text:
transcribed_text = "No speech detected in the audio."
logger.info(f"βœ“ STT successful: '{transcribed_text}'")
return transcribed_text
finally:
# Clean up temporary file
if os.path.exists(temp_audio_path):
os.unlink(temp_audio_path)
except Exception as e:
logger.error(f"βœ— STT failed: {str(e)}")
if "ffmpeg" in str(e).lower():
return "Audio processing failed: ffmpeg is required but not installed. Please install ffmpeg on the server."
raise Exception(f"Speech-to-text conversion failed: {str(e)}")