Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, GenerationConfig | |
from qwen_vl_utils import process_vision_info | |
import torch | |
import requests | |
from IPython.display import Markdown | |
# ---------------------------- | |
# MODEL LOADING (MedVLM-R1) | |
# ---------------------------- | |
MODEL_PATH = 'JZPeterPan/MedVLM-R1' | |
model = Qwen2VLForConditionalGeneration.from_pretrained( | |
MODEL_PATH, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
) | |
processor = AutoProcessor.from_pretrained(MODEL_PATH) | |
temp_generation_config = GenerationConfig( | |
max_new_tokens=1024, | |
do_sample=False, | |
temperature=1, | |
num_return_sequences=1, | |
pad_token_id=151643, | |
) | |
# ---------------------------- | |
# API SETTINGS (DeepSeek R1) | |
# ---------------------------- | |
api_key = "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" | |
deepseek_model = "deepseek/deepseek-r1" | |
# ---------------------------- | |
# DEFAULT QUESTION | |
# ---------------------------- | |
DEFAULT_QUESTION = "What abnormality is in the brain MRI and what is the location?\nA) Tumour\nB) No tumour" | |
QUESTION_TEMPLATE = """ | |
{Question} | |
Your task: | |
1. Think through the question step by step, enclose your reasoning process in <think>...</think> tags. | |
2. Then provide the correct single-letter choice (A, B, C, D,...) inside <answer>...</answer> tags. | |
3. No extra information or text outside of these tags. | |
""" | |
# ---------------------------- | |
# PIPELINE FUNCTION | |
# ---------------------------- | |
def process_pipeline(image, user_question): | |
if image is None or user_question.strip() == "": | |
return "Please upload an image and enter a question." | |
# Combine user's question with default | |
combined_question = user_question.strip() + "\n\n" + DEFAULT_QUESTION | |
message = [{ | |
"role": "user", | |
"content": [ | |
{"type": "image", "image": image}, | |
{"type": "text", "text": QUESTION_TEMPLATE.format(Question=combined_question)} | |
] | |
}] | |
# Prepare inputs for MedVLM | |
text = processor.apply_chat_template(message, tokenize=False, add_generation_prompt=True) | |
image_inputs, video_inputs = process_vision_info(message) | |
inputs = processor( | |
text=text, | |
images=image_inputs, | |
videos=video_inputs, | |
padding=True, | |
return_tensors="pt", | |
).to("cuda") | |
# Generate output from MedVLM | |
generated_ids = model.generate( | |
**inputs, | |
use_cache=True, | |
max_new_tokens=1024, | |
do_sample=False, | |
generation_config=temp_generation_config | |
) | |
generated_ids_trimmed = [ | |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) | |
] | |
output_text = processor.batch_decode( | |
generated_ids_trimmed, | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=False | |
)[0] | |
# Send MedVLM output to DeepSeek R1 | |
prompt = f"""The following is a medical AI's answer to a visual question. | |
The answer is about having tumour or not, focus on that mostly. | |
Keep the answer precise but more structured, and helpful for a medical professional. | |
If possible, make a table using the details from the original answer. | |
Original Answer: | |
{output_text} | |
""" | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
data = { | |
"model": deepseek_model, | |
"messages": [ | |
{"role": "system", "content": "You are a highly skilled medical writer."}, | |
{"role": "user", "content": prompt} | |
] | |
} | |
response = requests.post( | |
"https://openrouter.ai/api/v1/chat/completions", | |
headers=headers, | |
json=data | |
) | |
try: | |
detailed_answer = response.json()["choices"][0]["message"]["content"] | |
except Exception as e: | |
return f"Error from DeepSeek: {str(e)}\nFull Response: {response.text}" | |
return detailed_answer | |
# ---------------------------- | |
# GRADIO UI | |
# ---------------------------- | |
with gr.Blocks(title="Brain MRI QA") as demo: | |
with gr.Row(): | |
# Left column | |
with gr.Column(): | |
image_input = gr.Image(type="filepath", label="Upload Medical Image") | |
# Right column | |
with gr.Column(): | |
question_box = gr.Textbox(label="Your Question about the Image", placeholder="Type your question here...") | |
submit_btn = gr.Button("Submit") | |
clear_btn = gr.Button("Clear") | |
llm_output = gr.Textbox(label="Detailed LLM Answer", interactive=False, lines=10) | |
submit_btn.click( | |
fn=process_pipeline, | |
inputs=[image_input, question_box], | |
outputs=llm_output | |
) | |
clear_btn.click( | |
fn=lambda: ("", ""), | |
outputs=[question_box, llm_output] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |