MajorProj / app.py
RoneyBABA's picture
Upload 5 files
89ef5a0 verified
raw
history blame
2.45 kB
# if you dont use pipenv uncomment the following:
# from dotenv import load_dotenv
# load_dotenv()
#VoiceBot UI with Gradio
import os
import gradio as gr
from dotenv import load_dotenv
load_dotenv()
from model import encode_image, analyze_image_with_query, analyze_query
from patient import record_audio, transcription
#load_dotenv()
system_prompt="""You are a professional doctor. Given input is the querry of patient.
What's in this image (if provided)?. Do you find anything wrong with it medically?
Suggest some quick response actions, which can be implemented immediately. Do not add any numbers or special characters in
your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
Donot say 'In the image I see' but say 'With what I see, I think you have ....'
Do end the response with the specialist (ex:urologist, cardiologist) the user should consult and it strictly should be the very last word of the response.
Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
def process_inputs(audio_filepath, image_filepath = None):
speech_to_text_output = transcription(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
audio_filepath=audio_filepath,
stt_model="whisper-large-v3")
if not image_filepath:
doctor_response = analyze_query(query=system_prompt+speech_to_text_output, model="meta-llama/llama-4-scout-17b-16e-instruct")
else:
doctor_response = analyze_image_with_query(query=system_prompt + speech_to_text_output,encoded_image=encode_image(image_filepath),
model="meta-llama/llama-4-scout-17b-16e-instruct")
return speech_to_text_output, doctor_response
# Create the interface
iface = gr.Interface(
fn=process_inputs,
inputs=[
gr.Audio(sources=["microphone"], type="filepath"),
gr.Image(type="filepath")
],
outputs=[
gr.Textbox(label="Speech to Text"),
gr.Textbox(label="Doctor's Response")
],
title="AI Doctor with Vision and Voice"
)
iface.launch(debug=True)
#http://127.0.0.1:7860