|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
import speech_recognition as sr
|
|
|
from pydub import AudioSegment
|
|
|
from io import BytesIO
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
|
|
def record_audio(file_path, timeout=5, phrase_time_limit=10):
|
|
|
"""
|
|
|
Simplified function to record audio from the microphone and save it as an MP3 file.
|
|
|
|
|
|
Args:
|
|
|
file_path (str): Path to save the recorded audio file.
|
|
|
timeout (int): Maximum time to wait for a phrase to start (in seconds).
|
|
|
phrase_time_lfimit (int): Maximum time for the phrase to be recorded (in seconds).
|
|
|
"""
|
|
|
recognizer = sr.Recognizer()
|
|
|
|
|
|
try:
|
|
|
with sr.Microphone() as source:
|
|
|
logging.info("Adjusting for ambient noise...")
|
|
|
recognizer.adjust_for_ambient_noise(source, duration=1)
|
|
|
logging.info("Start speaking now...")
|
|
|
|
|
|
|
|
|
logging.info(f"Recording for {phrase_time_limit} seconds...")
|
|
|
audio_data = recognizer.record(source, duration=phrase_time_limit)
|
|
|
|
|
|
logging.info("Recording complete.")
|
|
|
|
|
|
|
|
|
wav_data = audio_data.get_wav_data()
|
|
|
audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
|
|
|
audio_segment.export(file_path, format="mp3", bitrate="128k")
|
|
|
|
|
|
logging.info(f"Audio saved to {file_path}")
|
|
|
|
|
|
except Exception as e:
|
|
|
logging.error(f"An error occurred: {e}")
|
|
|
|
|
|
audio_filepath="patient_message.mp3"
|
|
|
|
|
|
|
|
|
def transcription(stt_model, audio_filepath, GROQ_API_KEY):
|
|
|
import os
|
|
|
from groq import Groq
|
|
|
from dotenv import load_dotenv
|
|
|
load_dotenv()
|
|
|
|
|
|
GROQ_API_KEY=os.environ.get("GROQ_API_KEY")
|
|
|
stt_model="whisper-large-v3-turbo"
|
|
|
|
|
|
if GROQ_API_KEY is None:
|
|
|
raise ValueError("GROQ_API_KEY is not set! Add it to your environment or .env file.")
|
|
|
client=Groq(api_key=GROQ_API_KEY)
|
|
|
|
|
|
audio_file=open(audio_filepath, "rb")
|
|
|
transcription=client.audio.transcriptions.create(
|
|
|
model=stt_model,
|
|
|
file=audio_file,
|
|
|
language="en"
|
|
|
)
|
|
|
|
|
|
return transcription.text
|
|
|
|