# if you dont use pipenv uncomment the following: # from dotenv import load_dotenv # load_dotenv() #Step1: Setup Audio recorder (ffmpeg & portaudio) # ffmpeg, portaudio, pyaudio import logging import speech_recognition as sr from pydub import AudioSegment from io import BytesIO logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def record_audio(file_path, timeout=5, phrase_time_limit=10): """ Simplified function to record audio from the microphone and save it as an MP3 file. Args: file_path (str): Path to save the recorded audio file. timeout (int): Maximum time to wait for a phrase to start (in seconds). phrase_time_lfimit (int): Maximum time for the phrase to be recorded (in seconds). """ recognizer = sr.Recognizer() try: with sr.Microphone() as source: logging.info("Adjusting for ambient noise...") recognizer.adjust_for_ambient_noise(source, duration=1) logging.info("Start speaking now...") # Record the audio logging.info(f"Recording for {phrase_time_limit} seconds...") audio_data = recognizer.record(source, duration=phrase_time_limit) # audio_data = recognizer.listen(source, ) logging.info("Recording complete.") # Convert the recorded audio to an MP3 file wav_data = audio_data.get_wav_data() audio_segment = AudioSegment.from_wav(BytesIO(wav_data)) audio_segment.export(file_path, format="mp3", bitrate="128k") logging.info(f"Audio saved to {file_path}") except Exception as e: logging.error(f"An error occurred: {e}") audio_filepath="patient_message.mp3" #Step2: Setup Speech to text–STT–model for transcription def transcription(stt_model, audio_filepath, GROQ_API_KEY): import os from groq import Groq from dotenv import load_dotenv load_dotenv() GROQ_API_KEY=os.environ.get("GROQ_API_KEY") stt_model="whisper-large-v3-turbo" if GROQ_API_KEY is None: raise ValueError("GROQ_API_KEY is not set! Add it to your environment or .env file.") client=Groq(api_key=GROQ_API_KEY) audio_file=open(audio_filepath, "rb") transcription=client.audio.transcriptions.create( model=stt_model, file=audio_file, language="en" ) return transcription.text