malek-messaoudii commited on
Commit
91b1985
·
1 Parent(s): 73d4f3c

update env

Browse files
requirements.txt CHANGED
@@ -8,3 +8,4 @@ accelerate>=0.24.0
8
  protobuf>=3.20.0
9
  huggingface_hub>=0.19.0
10
  python-multipart
 
 
8
  protobuf>=3.20.0
9
  huggingface_hub>=0.19.0
10
  python-multipart
11
+ google-genai>=0.4.0
services/gemini_client.py CHANGED
@@ -1,3 +1,9 @@
1
- from google import genai
 
2
 
3
- client = genai.Client()
 
 
 
 
 
 
1
+ from google.genai import Client, types
2
+ import os
3
 
4
+ def get_gemini_client() -> Client:
5
+ """
6
+ Returns a singleton Gemini AI client.
7
+ Can be extended to read API key from environment variables.
8
+ """
9
+ return Client()
services/stt_service.py CHANGED
@@ -1,18 +1,9 @@
1
  from services.gemini_client import get_gemini_client
2
 
3
-
4
  async def speech_to_text(audio_bytes: bytes) -> str:
5
  client = get_gemini_client()
6
-
7
  response = client.models.generate_content(
8
  model="gemini-2.5-flash",
9
- contents=[
10
- {
11
- "mime_type": "audio/wav",
12
- "data": audio_bytes
13
- }
14
- ]
15
  )
16
-
17
- text = response.text
18
- return text
 
1
  from services.gemini_client import get_gemini_client
2
 
 
3
  async def speech_to_text(audio_bytes: bytes) -> str:
4
  client = get_gemini_client()
 
5
  response = client.models.generate_content(
6
  model="gemini-2.5-flash",
7
+ contents=[{"mime_type": "audio/wav", "data": audio_bytes}],
 
 
 
 
 
8
  )
9
+ return response.text
 
 
services/tts_service.py CHANGED
@@ -1,19 +1,8 @@
1
- from google.genai import types
2
  from services.gemini_client import get_gemini_client
3
- import wave
4
-
5
-
6
- def save_wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
7
- with wave.open(filename, "wb") as wf:
8
- wf.setnchannels(channels)
9
- wf.setsampwidth(sample_width)
10
- wf.setframerate(rate)
11
- wf.writeframes(pcm)
12
-
13
 
14
  async def generate_tts(text: str) -> bytes:
15
  client = get_gemini_client()
16
-
17
  response = client.models.generate_content(
18
  model="gemini-2.5-flash-preview-tts",
19
  contents=text,
@@ -21,13 +10,9 @@ async def generate_tts(text: str) -> bytes:
21
  response_modalities=["AUDIO"],
22
  speech_config=types.SpeechConfig(
23
  voice_config=types.VoiceConfig(
24
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
25
- voice_name="Kore"
26
- )
27
  )
28
  ),
29
  ),
30
  )
31
-
32
- audio_bytes = response.candidates[0].content.parts[0].inline_data.data
33
- return audio_bytes
 
 
1
  from services.gemini_client import get_gemini_client
2
+ from google.genai import types
 
 
 
 
 
 
 
 
 
3
 
4
  async def generate_tts(text: str) -> bytes:
5
  client = get_gemini_client()
 
6
  response = client.models.generate_content(
7
  model="gemini-2.5-flash-preview-tts",
8
  contents=text,
 
10
  response_modalities=["AUDIO"],
11
  speech_config=types.SpeechConfig(
12
  voice_config=types.VoiceConfig(
13
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Kore")
 
 
14
  )
15
  ),
16
  ),
17
  )
18
+ return response.candidates[0].content.parts[0].inline_data.data