Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

FastAPI-Backend-Models / services /stt_service.py

malek-messaoudii

Update requirements and refactor STT and chatbot services for improved model loading and error handling

3b2b211 21 days ago

2.89 kB

	import torch
	from transformers import pipeline
	import logging
	import tempfile
	import os
	import subprocess

	logger = logging.getLogger(__name__)

	# Global STT pipeline
	stt_pipeline = None

	def load_stt_model():
	"""Load the free Whisper model for speech-to-text"""
	global stt_pipeline
	try:
	# Check if ffmpeg is available
	if not check_ffmpeg():
	logger.warning("ffmpeg not found. STT may not work properly.")

	logger.info("Loading Whisper-medium STT model...")
	stt_pipeline = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-medium",
	device="cpu"
	)
	logger.info("✓ Whisper-medium STT model loaded successfully")
	except Exception as e:
	logger.error(f"✗ Failed to load Whisper-medium model: {str(e)}")
	stt_pipeline = None

	def check_ffmpeg():
	"""Check if ffmpeg is available"""
	try:
	subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
	return True
	except (subprocess.CalledProcessError, FileNotFoundError):
	return False

	async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
	"""
	Convert audio bytes to text using free Whisper model.
	"""
	global stt_pipeline

	try:
	if stt_pipeline is None:
	load_stt_model()
	if stt_pipeline is None:
	raise Exception("STT model failed to load")

	# Check ffmpeg again before processing
	if not check_ffmpeg():
	return "Error: ffmpeg is required for audio processing but is not installed. Please install ffmpeg on the server."

	logger.info(f"Converting audio to text using Whisper-medium")

	# Save audio bytes to temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
	temp_audio.write(audio_bytes)
	temp_audio_path = temp_audio.name

	try:
	# Transcribe using Whisper
	result = stt_pipeline(temp_audio_path)
	transcribed_text = result.get("text", "").strip()

	if not transcribed_text:
	transcribed_text = "No speech detected in the audio."

	logger.info(f"✓ STT successful: '{transcribed_text}'")
	return transcribed_text

	finally:
	# Clean up temporary file
	if os.path.exists(temp_audio_path):
	os.unlink(temp_audio_path)

	except Exception as e:
	logger.error(f"✗ STT failed: {str(e)}")
	if "ffmpeg" in str(e).lower():
	return "Audio processing failed: ffmpeg is required but not installed. Please install ffmpeg on the server."
	raise Exception(f"Speech-to-text conversion failed: {str(e)}")