File size: 4,834 Bytes
61663e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import gradio as gr
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, GenerationConfig
from qwen_vl_utils import process_vision_info
import torch
import requests
from IPython.display import Markdown

# ----------------------------
# MODEL LOADING (MedVLM-R1)
# ----------------------------
MODEL_PATH = 'JZPeterPan/MedVLM-R1'

model = Qwen2VLForConditionalGeneration.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(MODEL_PATH)

temp_generation_config = GenerationConfig(
    max_new_tokens=1024,
    do_sample=False,
    temperature=1,
    num_return_sequences=1,
    pad_token_id=151643,
)

# ----------------------------
# API SETTINGS (DeepSeek R1)
# ----------------------------
api_key = "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"
deepseek_model = "deepseek/deepseek-r1"

# ----------------------------
# DEFAULT QUESTION
# ----------------------------
DEFAULT_QUESTION = "What abnormality is in the brain MRI and what is the location?\nA) Tumour\nB) No tumour"

QUESTION_TEMPLATE = """
{Question}
Your task: 
1. Think through the question step by step, enclose your reasoning process in <think>...</think> tags. 
2. Then provide the correct single-letter choice (A, B, C, D,...) inside <answer>...</answer> tags.
3. No extra information or text outside of these tags.
"""

# ----------------------------
# PIPELINE FUNCTION
# ----------------------------
def process_pipeline(image, user_question):
    if image is None or user_question.strip() == "":
        return "Please upload an image and enter a question."

    # Combine user's question with default
    combined_question = user_question.strip() + "\n\n" + DEFAULT_QUESTION

    message = [{
        "role": "user",
        "content": [
            {"type": "image", "image": image},
            {"type": "text", "text": QUESTION_TEMPLATE.format(Question=combined_question)}
        ]
    }]

    # Prepare inputs for MedVLM
    text = processor.apply_chat_template(message, tokenize=False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(message)

    inputs = processor(
        text=text,
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt",
    ).to("cuda")

    # Generate output from MedVLM
    generated_ids = model.generate(
        **inputs,
        use_cache=True,
        max_new_tokens=1024,
        do_sample=False,
        generation_config=temp_generation_config
    )

    generated_ids_trimmed = [
        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids_trimmed,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )[0]

    # Send MedVLM output to DeepSeek R1
    prompt = f"""The following is a medical AI's answer to a visual question. 
The answer is about having tumour or not, focus on that mostly. 
Keep the answer precise but more structured, and helpful for a medical professional. 
If possible, make a table using the details from the original answer.

Original Answer:
{output_text}
"""

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    data = {
        "model": deepseek_model,
        "messages": [
            {"role": "system", "content": "You are a highly skilled medical writer."},
            {"role": "user", "content": prompt}
        ]
    }

    response = requests.post(
        "https://openrouter.ai/api/v1/chat/completions",
        headers=headers,
        json=data
    )

    try:
        detailed_answer = response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error from DeepSeek: {str(e)}\nFull Response: {response.text}"

    return detailed_answer

# ----------------------------
# GRADIO UI
# ----------------------------
with gr.Blocks(title="Brain MRI QA") as demo:
    with gr.Row():
        # Left column
        with gr.Column():
            image_input = gr.Image(type="filepath", label="Upload Medical Image")
        
        # Right column
        with gr.Column():
            question_box = gr.Textbox(label="Your Question about the Image", placeholder="Type your question here...")
            submit_btn = gr.Button("Submit")
            clear_btn = gr.Button("Clear")
            llm_output = gr.Textbox(label="Detailed LLM Answer", interactive=False, lines=10)

    submit_btn.click(
        fn=process_pipeline,
        inputs=[image_input, question_box],
        outputs=llm_output
    )
    clear_btn.click(
        fn=lambda: ("", ""),
        outputs=[question_box, llm_output]
    )

if __name__ == "__main__":
    demo.launch()