malek-messaoudii
commited on
Commit
·
a8c8142
1
Parent(s):
47a3efb
fix errors
Browse files- routes/audio.py +8 -22
- services/stt_service.py +33 -28
routes/audio.py
CHANGED
|
@@ -17,20 +17,16 @@ router = APIRouter(prefix="/audio", tags=["Audio"])
|
|
| 17 |
async def startup_event():
|
| 18 |
"""Load models when the router starts"""
|
| 19 |
logger.info("Loading free STT and Chatbot models...")
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
|
|
|
|
| 24 |
@router.post("/tts")
|
| 25 |
async def tts(request: TTSRequest):
|
| 26 |
-
"""
|
| 27 |
-
Convert text to speech and return audio file using free gTTS.
|
| 28 |
-
|
| 29 |
-
Example:
|
| 30 |
-
- POST /audio/tts
|
| 31 |
-
- Body: {"text": "Hello, welcome to our system"}
|
| 32 |
-
- Returns: MP3 audio file
|
| 33 |
-
"""
|
| 34 |
try:
|
| 35 |
logger.info(f"TTS request received for text: '{request.text}'")
|
| 36 |
audio_bytes = await generate_tts(request.text)
|
|
@@ -39,17 +35,8 @@ async def tts(request: TTSRequest):
|
|
| 39 |
logger.error(f"TTS error: {str(e)}")
|
| 40 |
raise HTTPException(status_code=500, detail=str(e))
|
| 41 |
|
| 42 |
-
|
| 43 |
@router.post("/stt", response_model=STTResponse)
|
| 44 |
async def stt(file: UploadFile = File(...)):
|
| 45 |
-
"""
|
| 46 |
-
Convert audio file to text using free Whisper model.
|
| 47 |
-
|
| 48 |
-
Example:
|
| 49 |
-
- POST /audio/stt
|
| 50 |
-
- File: audio.mp3 (or .wav, .m4a)
|
| 51 |
-
- Returns: {"text": "transcribed text", "model_name": "whisper-small", ...}
|
| 52 |
-
"""
|
| 53 |
# Validate file type
|
| 54 |
if file.content_type not in ALLOWED_AUDIO_TYPES:
|
| 55 |
raise HTTPException(
|
|
@@ -72,7 +59,7 @@ async def stt(file: UploadFile = File(...)):
|
|
| 72 |
|
| 73 |
return STTResponse(
|
| 74 |
text=text,
|
| 75 |
-
model_name="whisper-
|
| 76 |
language="en",
|
| 77 |
duration_seconds=None
|
| 78 |
)
|
|
@@ -80,7 +67,6 @@ async def stt(file: UploadFile = File(...)):
|
|
| 80 |
logger.error(f"STT error: {str(e)}")
|
| 81 |
raise HTTPException(status_code=500, detail=str(e))
|
| 82 |
|
| 83 |
-
|
| 84 |
@router.post("/chatbot")
|
| 85 |
async def chatbot_voice(file: UploadFile = File(...)):
|
| 86 |
"""
|
|
|
|
| 17 |
async def startup_event():
|
| 18 |
"""Load models when the router starts"""
|
| 19 |
logger.info("Loading free STT and Chatbot models...")
|
| 20 |
+
try:
|
| 21 |
+
load_stt_model()
|
| 22 |
+
load_chatbot_model()
|
| 23 |
+
logger.info("✓ Models loaded successfully")
|
| 24 |
+
except Exception as e:
|
| 25 |
+
logger.error(f"✗ Model loading failed: {str(e)}")
|
| 26 |
|
| 27 |
+
# ... rest of your routes remain the same ...
|
| 28 |
@router.post("/tts")
|
| 29 |
async def tts(request: TTSRequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
try:
|
| 31 |
logger.info(f"TTS request received for text: '{request.text}'")
|
| 32 |
audio_bytes = await generate_tts(request.text)
|
|
|
|
| 35 |
logger.error(f"TTS error: {str(e)}")
|
| 36 |
raise HTTPException(status_code=500, detail=str(e))
|
| 37 |
|
|
|
|
| 38 |
@router.post("/stt", response_model=STTResponse)
|
| 39 |
async def stt(file: UploadFile = File(...)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# Validate file type
|
| 41 |
if file.content_type not in ALLOWED_AUDIO_TYPES:
|
| 42 |
raise HTTPException(
|
|
|
|
| 59 |
|
| 60 |
return STTResponse(
|
| 61 |
text=text,
|
| 62 |
+
model_name="whisper-medium",
|
| 63 |
language="en",
|
| 64 |
duration_seconds=None
|
| 65 |
)
|
|
|
|
| 67 |
logger.error(f"STT error: {str(e)}")
|
| 68 |
raise HTTPException(status_code=500, detail=str(e))
|
| 69 |
|
|
|
|
| 70 |
@router.post("/chatbot")
|
| 71 |
async def chatbot_voice(file: UploadFile = File(...)):
|
| 72 |
"""
|
services/stt_service.py
CHANGED
|
@@ -1,13 +1,32 @@
|
|
| 1 |
-
import
|
|
|
|
| 2 |
import logging
|
| 3 |
import tempfile
|
| 4 |
import os
|
| 5 |
|
| 6 |
logger = logging.getLogger(__name__)
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
| 9 |
"""
|
| 10 |
-
Convert audio bytes to text using
|
| 11 |
|
| 12 |
Args:
|
| 13 |
audio_bytes: Raw audio file bytes
|
|
@@ -16,8 +35,15 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
|
| 16 |
Returns:
|
| 17 |
Transcribed text
|
| 18 |
"""
|
|
|
|
|
|
|
| 19 |
try:
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Save audio bytes to temporary file
|
| 23 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
|
|
@@ -25,20 +51,10 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
|
| 25 |
temp_audio_path = temp_audio.name
|
| 26 |
|
| 27 |
try:
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
|
| 32 |
-
with open(temp_audio_path, "rb") as f:
|
| 33 |
-
response = requests.post(API_URL, headers=headers, data=f)
|
| 34 |
-
|
| 35 |
-
if response.status_code == 200:
|
| 36 |
-
result = response.json()
|
| 37 |
-
transcribed_text = result.get("text", "").strip()
|
| 38 |
-
else:
|
| 39 |
-
# Fallback to local model if API fails
|
| 40 |
-
transcribed_text = await fallback_stt(audio_bytes, filename)
|
| 41 |
-
|
| 42 |
if not transcribed_text:
|
| 43 |
transcribed_text = "Sorry, I couldn't understand the audio."
|
| 44 |
|
|
@@ -52,15 +68,4 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
|
| 52 |
|
| 53 |
except Exception as e:
|
| 54 |
logger.error(f"✗ STT failed: {str(e)}")
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
|
| 59 |
-
"""Fallback STT using a simpler approach"""
|
| 60 |
-
try:
|
| 61 |
-
# Simple fallback - you could implement a basic speech recognition here
|
| 62 |
-
# For now, return a placeholder
|
| 63 |
-
return "Audio received but transcription service is temporarily unavailable."
|
| 64 |
-
except Exception as e:
|
| 65 |
-
logger.error(f"Fallback STT also failed: {str(e)}")
|
| 66 |
-
return "Audio processing failed."
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import pipeline
|
| 3 |
import logging
|
| 4 |
import tempfile
|
| 5 |
import os
|
| 6 |
|
| 7 |
logger = logging.getLogger(__name__)
|
| 8 |
|
| 9 |
+
# Global STT pipeline
|
| 10 |
+
stt_pipeline = None
|
| 11 |
+
|
| 12 |
+
def load_stt_model():
|
| 13 |
+
"""Load the free Whisper model for speech-to-text"""
|
| 14 |
+
global stt_pipeline
|
| 15 |
+
try:
|
| 16 |
+
logger.info("Loading Whisper-medium STT model...")
|
| 17 |
+
stt_pipeline = pipeline(
|
| 18 |
+
"automatic-speech-recognition",
|
| 19 |
+
model="openai/whisper-medium",
|
| 20 |
+
device="cpu"
|
| 21 |
+
)
|
| 22 |
+
logger.info("✓ Whisper-medium STT model loaded successfully")
|
| 23 |
+
except Exception as e:
|
| 24 |
+
logger.error(f"✗ Failed to load Whisper-medium model: {str(e)}")
|
| 25 |
+
stt_pipeline = None
|
| 26 |
+
|
| 27 |
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
|
| 28 |
"""
|
| 29 |
+
Convert audio bytes to text using free Whisper model.
|
| 30 |
|
| 31 |
Args:
|
| 32 |
audio_bytes: Raw audio file bytes
|
|
|
|
| 35 |
Returns:
|
| 36 |
Transcribed text
|
| 37 |
"""
|
| 38 |
+
global stt_pipeline
|
| 39 |
+
|
| 40 |
try:
|
| 41 |
+
if stt_pipeline is None:
|
| 42 |
+
load_stt_model()
|
| 43 |
+
if stt_pipeline is None:
|
| 44 |
+
raise Exception("STT model failed to load")
|
| 45 |
+
|
| 46 |
+
logger.info(f"Converting audio to text using Whisper-medium")
|
| 47 |
|
| 48 |
# Save audio bytes to temporary file
|
| 49 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
|
|
|
|
| 51 |
temp_audio_path = temp_audio.name
|
| 52 |
|
| 53 |
try:
|
| 54 |
+
# Transcribe using Whisper
|
| 55 |
+
result = stt_pipeline(temp_audio_path)
|
| 56 |
+
transcribed_text = result.get("text", "").strip()
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
if not transcribed_text:
|
| 59 |
transcribed_text = "Sorry, I couldn't understand the audio."
|
| 60 |
|
|
|
|
| 68 |
|
| 69 |
except Exception as e:
|
| 70 |
logger.error(f"✗ STT failed: {str(e)}")
|
| 71 |
+
raise Exception(f"Speech-to-text conversion failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|