Spaces:
Runtime error
Runtime error
File size: 4,834 Bytes
61663e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, GenerationConfig
from qwen_vl_utils import process_vision_info
import torch
import requests
from IPython.display import Markdown
# ----------------------------
# MODEL LOADING (MedVLM-R1)
# ----------------------------
MODEL_PATH = 'JZPeterPan/MedVLM-R1'
model = Qwen2VLForConditionalGeneration.from_pretrained(
MODEL_PATH,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(MODEL_PATH)
temp_generation_config = GenerationConfig(
max_new_tokens=1024,
do_sample=False,
temperature=1,
num_return_sequences=1,
pad_token_id=151643,
)
# ----------------------------
# API SETTINGS (DeepSeek R1)
# ----------------------------
api_key = "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"
deepseek_model = "deepseek/deepseek-r1"
# ----------------------------
# DEFAULT QUESTION
# ----------------------------
DEFAULT_QUESTION = "What abnormality is in the brain MRI and what is the location?\nA) Tumour\nB) No tumour"
QUESTION_TEMPLATE = """
{Question}
Your task:
1. Think through the question step by step, enclose your reasoning process in <think>...</think> tags.
2. Then provide the correct single-letter choice (A, B, C, D,...) inside <answer>...</answer> tags.
3. No extra information or text outside of these tags.
"""
# ----------------------------
# PIPELINE FUNCTION
# ----------------------------
def process_pipeline(image, user_question):
if image is None or user_question.strip() == "":
return "Please upload an image and enter a question."
# Combine user's question with default
combined_question = user_question.strip() + "\n\n" + DEFAULT_QUESTION
message = [{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": QUESTION_TEMPLATE.format(Question=combined_question)}
]
}]
# Prepare inputs for MedVLM
text = processor.apply_chat_template(message, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(message)
inputs = processor(
text=text,
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
).to("cuda")
# Generate output from MedVLM
generated_ids = model.generate(
**inputs,
use_cache=True,
max_new_tokens=1024,
do_sample=False,
generation_config=temp_generation_config
)
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)[0]
# Send MedVLM output to DeepSeek R1
prompt = f"""The following is a medical AI's answer to a visual question.
The answer is about having tumour or not, focus on that mostly.
Keep the answer precise but more structured, and helpful for a medical professional.
If possible, make a table using the details from the original answer.
Original Answer:
{output_text}
"""
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
data = {
"model": deepseek_model,
"messages": [
{"role": "system", "content": "You are a highly skilled medical writer."},
{"role": "user", "content": prompt}
]
}
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
json=data
)
try:
detailed_answer = response.json()["choices"][0]["message"]["content"]
except Exception as e:
return f"Error from DeepSeek: {str(e)}\nFull Response: {response.text}"
return detailed_answer
# ----------------------------
# GRADIO UI
# ----------------------------
with gr.Blocks(title="Brain MRI QA") as demo:
with gr.Row():
# Left column
with gr.Column():
image_input = gr.Image(type="filepath", label="Upload Medical Image")
# Right column
with gr.Column():
question_box = gr.Textbox(label="Your Question about the Image", placeholder="Type your question here...")
submit_btn = gr.Button("Submit")
clear_btn = gr.Button("Clear")
llm_output = gr.Textbox(label="Detailed LLM Answer", interactive=False, lines=10)
submit_btn.click(
fn=process_pipeline,
inputs=[image_input, question_box],
outputs=llm_output
)
clear_btn.click(
fn=lambda: ("", ""),
outputs=[question_box, llm_output]
)
if __name__ == "__main__":
demo.launch()
|