Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

malek-messaoudii commited on 24 days ago

Commit

91b1985

1 Parent(s): 73d4f3c

update env

Files changed (4) hide show

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ accelerate>=0.24.0
 protobuf>=3.20.0
 huggingface_hub>=0.19.0
 python-multipart

 protobuf>=3.20.0
 huggingface_hub>=0.19.0
 python-multipart
+google-genai>=0.4.0

services/gemini_client.py CHANGED Viewed

@@ -1,3 +1,9 @@
-from google import genai
-client = genai.Client()

+from google.genai import Client, types
+import os
+def get_gemini_client() -> Client:
+    """
+    Returns a singleton Gemini AI client.
+    Can be extended to read API key from environment variables.
+    """
+    return Client()

services/stt_service.py CHANGED Viewed

@@ -1,18 +1,9 @@
 from services.gemini_client import get_gemini_client
 async def speech_to_text(audio_bytes: bytes) -> str:
     client = get_gemini_client()
     response = client.models.generate_content(
         model="gemini-2.5-flash",
-        contents=[
-            {
-                "mime_type": "audio/wav",
-                "data": audio_bytes
-            }
-        ]
     )
-    text = response.text
-    return text

 from services.gemini_client import get_gemini_client
 async def speech_to_text(audio_bytes: bytes) -> str:
     client = get_gemini_client()
     response = client.models.generate_content(
         model="gemini-2.5-flash",
+        contents=[{"mime_type": "audio/wav", "data": audio_bytes}],
     )
+    return response.text

services/tts_service.py CHANGED Viewed

@@ -1,19 +1,8 @@
-from google.genai import types
 from services.gemini_client import get_gemini_client
-import wave
-def save_wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
-    with wave.open(filename, "wb") as wf:
-        wf.setnchannels(channels)
-        wf.setsampwidth(sample_width)
-        wf.setframerate(rate)
-        wf.writeframes(pcm)
 async def generate_tts(text: str) -> bytes:
     client = get_gemini_client()
     response = client.models.generate_content(
         model="gemini-2.5-flash-preview-tts",
         contents=text,
@@ -21,13 +10,9 @@ async def generate_tts(text: str) -> bytes:
             response_modalities=["AUDIO"],
             speech_config=types.SpeechConfig(
                 voice_config=types.VoiceConfig(
-                    prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                        voice_name="Kore"
-                    )
                 )
             ),
         ),
     )
-    audio_bytes = response.candidates[0].content.parts[0].inline_data.data
-    return audio_bytes

 from services.gemini_client import get_gemini_client
+from google.genai import types
 async def generate_tts(text: str) -> bytes:
     client = get_gemini_client()
     response = client.models.generate_content(
         model="gemini-2.5-flash-preview-tts",
         contents=text,
             response_modalities=["AUDIO"],
             speech_config=types.SpeechConfig(
                 voice_config=types.VoiceConfig(
+                    prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Kore")
                 )
             ),
         ),
     )
+    return response.candidates[0].content.parts[0].inline_data.data