|
!pip install -U adapter-transformers |
|
!pip install -U transformers |
|
import gradio as gr |
|
from transformers import CLIPProcessor, CLIPModel |
|
from PIL import Image |
|
import torch |
|
|
|
# Load the model and processor |
|
model = CLIPModel.from_pretrained("Taarhoinc/TaarhoGen1") |
|
processor = CLIPProcessor.from_pretrained("Taarhoinc/TaarhoGen1") |
|
|
|
# Define the function to describe a floor plan |
|
def describe_floorplan(floorplan_image: Image.Image, top_k: int = 3): |
|
"""Describes a floor plan drawing by listing components.""" |
|
|
|
# Define a list of common floor plan components |
|
components = [ |
|
"bedroom", |
|
"kitchen", |
|
"bathroom", |
|
"living room", |
|
"dining room", |
|
"hallway", |
|
"garage", |
|
"balcony", |
|
"stairs", |
|
"door", |
|
"window", |
|
] |
|
|
|
# Preprocess the image and text prompts |
|
inputs = processor( |
|
text=components, images=floorplan_image, return_tensors="pt", padding=True |
|
) |
|
|
|
# Get the logits (similarity scores) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits_per_image = outputs.logits_per_image |
|
|
|
# Get the predicted probabilities |
|
probs = logits_per_image.softmax(dim=1).cpu().numpy()[0] |
|
|
|
# Get the indices of the top-k components |
|
top_k_indices = probs.argsort()[-top_k:][::-1] |
|
|
|
# Get the top-k components |
|
detected_components = [components[i] for i in top_k_indices] |
|
|
|
return ", ".join(detected_components) # Return as a comma-separated string |
|
|
|
# Create the Gradio interface |
|
gr.Interface( |
|
fn=describe_floorplan, |
|
inputs=[ |
|
gr.Image(label="Upload a floor plan drawing", type="pil"), |
|
gr.Slider(1, 10, step=1, value=3, label="Number of components to detect"), |
|
], |
|
outputs=gr.Label(label="Detected Components"), |
|
title="Floor Plan Description with TaarhoGen1", |
|
description="Upload a floor plan drawing to get a list of detected components.", |
|
).launch() |