Spaces:
Runtime error
Runtime error
| # Copyright 2024 Ronan Le Meillat | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # Import necessary libraries | |
| import gradio as gr | |
| from transformers import AutoProcessor, Idefics3ForConditionalGeneration, image_utils | |
| import torch | |
| # Determine the device (GPU or CPU) to run the model on | |
| device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | |
| print(f"Using device: {device}") # Log the device being used | |
| # Define the model ID and base model path | |
| model_id = "eltorio/IDEFICS3_ROCO" | |
| base_model_path = "HuggingFaceM4/Idefics3-8B-Llama3" # or change to local path | |
| # Initialize the processor from the base model path | |
| processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True) | |
| # Initialize the model from the base model path and set the torch dtype to bfloat16 | |
| model = Idefics3ForConditionalGeneration.from_pretrained( | |
| base_model_path, torch_dtype=torch.bfloat16 | |
| ).to(device) # Move the model to the specified device | |
| # Load the adapter from the model ID and automatically map it to the device | |
| model.load_adapter(model_id, device_map="auto") | |
| # Define a function to infer a description from an image | |
| def infere(image): | |
| """ | |
| Generate a description of a medical image. | |
| Args: | |
| - image (PIL Image): The medical image to describe. | |
| Returns: | |
| - generated_texts (List[str]): A list containing the generated description. | |
| """ | |
| # Define a chat template for the model to respond to | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": [ | |
| {"type": "text", "text": "You are a valuable medical doctor and you are looking at an image of your patient."}, | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": "What do we see in this image?"}, | |
| ] | |
| }, | |
| ] | |
| # Apply the chat template and add a generation prompt | |
| prompt = processor.apply_chat_template(messages, add_generation_prompt=True) | |
| # Preprocess the input image and text | |
| inputs = processor(text=prompt, images=[image], return_tensors="pt") | |
| # Move the inputs to the specified device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # Generate a description with the model | |
| generated_ids = model.generate(**inputs, max_new_tokens=100) | |
| # Decode the generated IDs into text | |
| generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True) | |
| return generated_texts | |
| # Define the title, description, and device description for the Gradio interface | |
| title = f"<a href='https://huggingface.co/eltorio/IDEFICS3_ROCO'>IDEFICS3_ROCO</a>: Medical Image to Text <b>running on {device}</b>" | |
| desc = "This model generates a description of a medical image." | |
| device_desc = f"This model is running on {device} 🚀." if device == torch.device('cuda') else f"🐢 This model is running on {device} it will be very (very) slow. If you can donate some GPU time it will be usable 🐢. <a href='https://huggingface.co/eltorio/IDEFICS3_ROCO/discussions'>Please contact us.</a>" | |
| # Define the long description for the Gradio interface | |
| long_desc = f"This demo is based on the <a href='https://huggingface.co/eltorio/IDEFICS3_ROCO'>IDEFICS3_ROCO model</a>, which is a multimodal model that can generate text from images. It has been fine-tuned on <a href='https://huggingface.co/datasets/eltorio/ROCO-radiology'>eltorio/ROCO-radiology</a> a dataset of medical images and can generate descriptions of medical images. Try uploading an image of a medical image and see what the model generates!<br><b>{device_desc}</b><br> 2024 - Ronan Le Meillat" | |
| # Create a Gradio interface with the infere function and specified title and descriptions | |
| radiotest = gr.Interface(fn=infere, inputs="image", outputs="text", title=title, | |
| description=desc, article=long_desc) | |
| # Launch the Gradio interface and share it | |
| radiotest.launch(share=True) |