malek-messaoudii commited on
Commit
a8c8142
·
1 Parent(s): 47a3efb

fix errors

Browse files
Files changed (2) hide show
  1. routes/audio.py +8 -22
  2. services/stt_service.py +33 -28
routes/audio.py CHANGED
@@ -17,20 +17,16 @@ router = APIRouter(prefix="/audio", tags=["Audio"])
17
  async def startup_event():
18
  """Load models when the router starts"""
19
  logger.info("Loading free STT and Chatbot models...")
20
- load_stt_model()
21
- load_chatbot_model()
22
-
 
 
 
23
 
 
24
  @router.post("/tts")
25
  async def tts(request: TTSRequest):
26
- """
27
- Convert text to speech and return audio file using free gTTS.
28
-
29
- Example:
30
- - POST /audio/tts
31
- - Body: {"text": "Hello, welcome to our system"}
32
- - Returns: MP3 audio file
33
- """
34
  try:
35
  logger.info(f"TTS request received for text: '{request.text}'")
36
  audio_bytes = await generate_tts(request.text)
@@ -39,17 +35,8 @@ async def tts(request: TTSRequest):
39
  logger.error(f"TTS error: {str(e)}")
40
  raise HTTPException(status_code=500, detail=str(e))
41
 
42
-
43
  @router.post("/stt", response_model=STTResponse)
44
  async def stt(file: UploadFile = File(...)):
45
- """
46
- Convert audio file to text using free Whisper model.
47
-
48
- Example:
49
- - POST /audio/stt
50
- - File: audio.mp3 (or .wav, .m4a)
51
- - Returns: {"text": "transcribed text", "model_name": "whisper-small", ...}
52
- """
53
  # Validate file type
54
  if file.content_type not in ALLOWED_AUDIO_TYPES:
55
  raise HTTPException(
@@ -72,7 +59,7 @@ async def stt(file: UploadFile = File(...)):
72
 
73
  return STTResponse(
74
  text=text,
75
- model_name="whisper-small",
76
  language="en",
77
  duration_seconds=None
78
  )
@@ -80,7 +67,6 @@ async def stt(file: UploadFile = File(...)):
80
  logger.error(f"STT error: {str(e)}")
81
  raise HTTPException(status_code=500, detail=str(e))
82
 
83
-
84
  @router.post("/chatbot")
85
  async def chatbot_voice(file: UploadFile = File(...)):
86
  """
 
17
  async def startup_event():
18
  """Load models when the router starts"""
19
  logger.info("Loading free STT and Chatbot models...")
20
+ try:
21
+ load_stt_model()
22
+ load_chatbot_model()
23
+ logger.info("✓ Models loaded successfully")
24
+ except Exception as e:
25
+ logger.error(f"✗ Model loading failed: {str(e)}")
26
 
27
+ # ... rest of your routes remain the same ...
28
  @router.post("/tts")
29
  async def tts(request: TTSRequest):
 
 
 
 
 
 
 
 
30
  try:
31
  logger.info(f"TTS request received for text: '{request.text}'")
32
  audio_bytes = await generate_tts(request.text)
 
35
  logger.error(f"TTS error: {str(e)}")
36
  raise HTTPException(status_code=500, detail=str(e))
37
 
 
38
  @router.post("/stt", response_model=STTResponse)
39
  async def stt(file: UploadFile = File(...)):
 
 
 
 
 
 
 
 
40
  # Validate file type
41
  if file.content_type not in ALLOWED_AUDIO_TYPES:
42
  raise HTTPException(
 
59
 
60
  return STTResponse(
61
  text=text,
62
+ model_name="whisper-medium",
63
  language="en",
64
  duration_seconds=None
65
  )
 
67
  logger.error(f"STT error: {str(e)}")
68
  raise HTTPException(status_code=500, detail=str(e))
69
 
 
70
  @router.post("/chatbot")
71
  async def chatbot_voice(file: UploadFile = File(...)):
72
  """
services/stt_service.py CHANGED
@@ -1,13 +1,32 @@
1
- import requests
 
2
  import logging
3
  import tempfile
4
  import os
5
 
6
  logger = logging.getLogger(__name__)
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
9
  """
10
- Convert audio bytes to text using Hugging Face Inference API (free).
11
 
12
  Args:
13
  audio_bytes: Raw audio file bytes
@@ -16,8 +35,15 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
16
  Returns:
17
  Transcribed text
18
  """
 
 
19
  try:
20
- logger.info(f"Converting audio to text using Hugging Face API")
 
 
 
 
 
21
 
22
  # Save audio bytes to temporary file
23
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
@@ -25,20 +51,10 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
25
  temp_audio_path = temp_audio.name
26
 
27
  try:
28
- # Use Hugging Face Inference API (free)
29
- API_URL = "https://api-inference.huggingface.co/models/openai/whisper-medium"
30
- headers = {"Authorization": "Bearer YOUR_HF_TOKEN"} # Optional for free tier
31
 
32
- with open(temp_audio_path, "rb") as f:
33
- response = requests.post(API_URL, headers=headers, data=f)
34
-
35
- if response.status_code == 200:
36
- result = response.json()
37
- transcribed_text = result.get("text", "").strip()
38
- else:
39
- # Fallback to local model if API fails
40
- transcribed_text = await fallback_stt(audio_bytes, filename)
41
-
42
  if not transcribed_text:
43
  transcribed_text = "Sorry, I couldn't understand the audio."
44
 
@@ -52,15 +68,4 @@ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
52
 
53
  except Exception as e:
54
  logger.error(f"✗ STT failed: {str(e)}")
55
- return "Sorry, there was an error processing your audio."
56
-
57
-
58
- async def fallback_stt(audio_bytes: bytes, filename: str) -> str:
59
- """Fallback STT using a simpler approach"""
60
- try:
61
- # Simple fallback - you could implement a basic speech recognition here
62
- # For now, return a placeholder
63
- return "Audio received but transcription service is temporarily unavailable."
64
- except Exception as e:
65
- logger.error(f"Fallback STT also failed: {str(e)}")
66
- return "Audio processing failed."
 
1
+ import torch
2
+ from transformers import pipeline
3
  import logging
4
  import tempfile
5
  import os
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
+ # Global STT pipeline
10
+ stt_pipeline = None
11
+
12
+ def load_stt_model():
13
+ """Load the free Whisper model for speech-to-text"""
14
+ global stt_pipeline
15
+ try:
16
+ logger.info("Loading Whisper-medium STT model...")
17
+ stt_pipeline = pipeline(
18
+ "automatic-speech-recognition",
19
+ model="openai/whisper-medium",
20
+ device="cpu"
21
+ )
22
+ logger.info("✓ Whisper-medium STT model loaded successfully")
23
+ except Exception as e:
24
+ logger.error(f"✗ Failed to load Whisper-medium model: {str(e)}")
25
+ stt_pipeline = None
26
+
27
  async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
28
  """
29
+ Convert audio bytes to text using free Whisper model.
30
 
31
  Args:
32
  audio_bytes: Raw audio file bytes
 
35
  Returns:
36
  Transcribed text
37
  """
38
+ global stt_pipeline
39
+
40
  try:
41
+ if stt_pipeline is None:
42
+ load_stt_model()
43
+ if stt_pipeline is None:
44
+ raise Exception("STT model failed to load")
45
+
46
+ logger.info(f"Converting audio to text using Whisper-medium")
47
 
48
  # Save audio bytes to temporary file
49
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
 
51
  temp_audio_path = temp_audio.name
52
 
53
  try:
54
+ # Transcribe using Whisper
55
+ result = stt_pipeline(temp_audio_path)
56
+ transcribed_text = result.get("text", "").strip()
57
 
 
 
 
 
 
 
 
 
 
 
58
  if not transcribed_text:
59
  transcribed_text = "Sorry, I couldn't understand the audio."
60
 
 
68
 
69
  except Exception as e:
70
  logger.error(f"✗ STT failed: {str(e)}")
71
+ raise Exception(f"Speech-to-text conversion failed: {str(e)}")