File size: 1,232 Bytes
209f7bb
 
4a13628
587fe4f
c45f0d6
a71355d
c45f0d6
587fe4f
 
 
c45f0d6
 
587fe4f
 
 
c45f0d6
 
1e7709f
a71355d
1e7709f
 
c45f0d6
a71355d
c45f0d6
 
1e7709f
c45f0d6
 
a71355d
c45f0d6
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import requests
from config import GROQ_API_KEY, GROQ_STT_MODEL

def speech_to_text(audio_file: str) -> str:
    """
    Convert audio file to text using Groq's Whisper API (English only)
    """
    if not GROQ_API_KEY:
        raise RuntimeError("GROQ_API_KEY is not set in config")

    url = "https://api.groq.com/openai/v1/audio/transcriptions"
    
    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}"
    }
    
    with open(audio_file, "rb") as audio_data:
        files = {
            "file": (audio_file, audio_data, "audio/wav")
        }
        data = {
            "model": GROQ_STT_MODEL,
            "language": "en",  # Force English
            "temperature": 0,
            "response_format": "json"
        }
        
        try:
            response = requests.post(url, headers=headers, files=files, data=data, timeout=30)
            response.raise_for_status()
            
            result = response.json()
            return result.get("text", "")
            
        except requests.exceptions.RequestException as e:
            raise Exception(f"Groq STT API error: {str(e)}")
        except Exception as e:
            raise Exception(f"Unexpected error in speech_to_text: {str(e)}")