|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
import gradio as gr
|
|
|
from dotenv import load_dotenv
|
|
|
load_dotenv()
|
|
|
|
|
|
from model import encode_image, analyze_image_with_query, analyze_query
|
|
|
from patient import record_audio, transcription
|
|
|
|
|
|
|
|
|
|
|
|
system_prompt="""You are a professional doctor. Given input is the querry of patient.
|
|
|
What's in this image (if provided)?. Do you find anything wrong with it medically?
|
|
|
Suggest some quick response actions, which can be implemented immediately. Do not add any numbers or special characters in
|
|
|
your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
|
|
|
Donot say 'In the image I see' but say 'With what I see, I think you have ....'
|
|
|
Do end the response with the specialist (ex:urologist, cardiologist) the user should consult and it strictly should be the very last word of the response.
|
|
|
Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
|
|
|
Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
|
|
|
|
|
|
|
|
|
def process_inputs(audio_filepath, image_filepath = None):
|
|
|
speech_to_text_output = transcription(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
|
|
|
audio_filepath=audio_filepath,
|
|
|
stt_model="whisper-large-v3")
|
|
|
|
|
|
if not image_filepath:
|
|
|
doctor_response = analyze_query(query=system_prompt+speech_to_text_output, model="meta-llama/llama-4-scout-17b-16e-instruct")
|
|
|
else:
|
|
|
doctor_response = analyze_image_with_query(query=system_prompt + speech_to_text_output,encoded_image=encode_image(image_filepath),
|
|
|
model="meta-llama/llama-4-scout-17b-16e-instruct")
|
|
|
return speech_to_text_output, doctor_response
|
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface(
|
|
|
fn=process_inputs,
|
|
|
inputs=[
|
|
|
gr.Audio(sources=["microphone"], type="filepath"),
|
|
|
gr.Image(type="filepath")
|
|
|
],
|
|
|
outputs=[
|
|
|
gr.Textbox(label="Speech to Text"),
|
|
|
gr.Textbox(label="Doctor's Response")
|
|
|
],
|
|
|
title="AI Doctor with Vision and Voice"
|
|
|
)
|
|
|
|
|
|
iface.launch(debug=True)
|
|
|
|
|
|
|