malek-messaoudii commited on
Commit
d4b6133
·
1 Parent(s): 520a06a

Update codes

Browse files
Files changed (2) hide show
  1. routes/audio.py +18 -5
  2. services/stt_service.py +11 -3
routes/audio.py CHANGED
@@ -1,6 +1,7 @@
1
  from fastapi import APIRouter, UploadFile, File, HTTPException
2
  from fastapi.responses import StreamingResponse
3
  import io
 
4
  from services.tts_service import generate_tts
5
  from services.stt_service import speech_to_text
6
 
@@ -11,6 +12,9 @@ router = APIRouter(prefix="/audio", tags=["Audio"])
11
  # ------------------------
12
  @router.post("/tts")
13
  async def tts(text: str):
 
 
 
14
  try:
15
  audio_bytes = await generate_tts(text)
16
  except Exception as e:
@@ -46,25 +50,34 @@ async def stt(file: UploadFile = File(...)):
46
  )
47
 
48
  try:
49
- audio_bytes = await file.read() # Read uploaded file
50
- text = await speech_to_text(audio_bytes) # Call your Gemini STT
51
  except Exception as e:
52
  raise HTTPException(status_code=500, detail=str(e))
53
 
54
  return {"text": text}
55
 
56
 
57
-
58
  # ------------------------
59
  # Voice Chatbot: User sends voice → TTS reply
60
  # ------------------------
61
  @router.post("/chatbot")
62
  async def chatbot(file: UploadFile = File(...)):
 
 
 
 
 
 
 
 
 
 
63
  try:
64
  audio_bytes = await file.read()
65
- user_text = await speech_to_text(audio_bytes)
66
 
67
- # Replace with your NLP logic or chatbot response
68
  response_text = f"You said: {user_text}"
69
 
70
  audio_response = await generate_tts(response_text)
 
1
  from fastapi import APIRouter, UploadFile, File, HTTPException
2
  from fastapi.responses import StreamingResponse
3
  import io
4
+ import mimetypes
5
  from services.tts_service import generate_tts
6
  from services.stt_service import speech_to_text
7
 
 
12
  # ------------------------
13
  @router.post("/tts")
14
  async def tts(text: str):
15
+ """
16
+ Convert text to speech and return audio.
17
+ """
18
  try:
19
  audio_bytes = await generate_tts(text)
20
  except Exception as e:
 
50
  )
51
 
52
  try:
53
+ audio_bytes = await file.read()
54
+ text = await speech_to_text(audio_bytes, file.filename) # Pass filename for correct MIME
55
  except Exception as e:
56
  raise HTTPException(status_code=500, detail=str(e))
57
 
58
  return {"text": text}
59
 
60
 
 
61
  # ------------------------
62
  # Voice Chatbot: User sends voice → TTS reply
63
  # ------------------------
64
  @router.post("/chatbot")
65
  async def chatbot(file: UploadFile = File(...)):
66
+ """
67
+ User sends an audio file, the system converts to text, generates response, and returns TTS audio.
68
+ """
69
+ # Validate MIME type
70
+ if file.content_type not in ALLOWED_AUDIO_TYPES:
71
+ raise HTTPException(
72
+ status_code=400,
73
+ detail=f"Unsupported audio format: {file.content_type}. Supported: WAV, MP3, M4A"
74
+ )
75
+
76
  try:
77
  audio_bytes = await file.read()
78
+ user_text = await speech_to_text(audio_bytes, file.filename)
79
 
80
+ # Replace this with your NLP or chatbot logic
81
  response_text = f"You said: {user_text}"
82
 
83
  audio_response = await generate_tts(response_text)
services/stt_service.py CHANGED
@@ -1,11 +1,19 @@
1
  from services.gemini_client import get_gemini_client
2
  from google.genai import types
 
3
 
4
- async def speech_to_text(audio_bytes: bytes) -> str:
 
 
 
5
  client = get_gemini_client()
6
 
7
- # Wrap audio bytes correctly
8
- audio_file = types.File(data=audio_bytes, mime_type="audio/wav")
 
 
 
 
9
 
10
  response = client.models.generate_content(
11
  model="gemini-2.5-flash",
 
1
  from services.gemini_client import get_gemini_client
2
  from google.genai import types
3
+ import mimetypes # <- Add this
4
 
5
+ async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
6
+ """
7
+ Convert audio bytes to text using Gemini API. Supports WAV, MP3, M4A.
8
+ """
9
  client = get_gemini_client()
10
 
11
+ # Detect MIME type from filename
12
+ mime_type, _ = mimetypes.guess_type(filename)
13
+ if mime_type is None:
14
+ mime_type = "audio/wav" # fallback
15
+
16
+ audio_file = types.File(data=audio_bytes, mime_type=mime_type)
17
 
18
  response = client.models.generate_content(
19
  model="gemini-2.5-flash",