Taarhoinc
/

TaarhoGen1

@@ -1,24 +1,17 @@
 !pip install -U adapter-transformers
 !pip install -U transformers
-!pip install torch torchvision torchaudio
-!pip install opencv-python
 import gradio as gr
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
 import torch
-import cv2
-# Load the CLIP model and processor
 model = CLIPModel.from_pretrained("Taarhoinc/TaarhoGen1")
 processor = CLIPProcessor.from_pretrained("Taarhoinc/TaarhoGen1")
-# Load the object detection model (YOLOv5 example)
-model_path = 'yolov5s.pt'  # Replace with the path to your YOLOv5 model
-object_detection_model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path)
-# Define the function to describe a floor plan with sizes
-def describe_floorplan_with_sizes(floorplan_image: Image.Image, top_k: int = 3):
-    """Describes a floor plan drawing by listing components and their sizes."""
     # Define a list of common floor plan components
     components = [
@@ -35,41 +28,35 @@ def describe_floorplan_with_sizes(floorplan_image: Image.Image, top_k: int = 3):
         "window",
     ]
-    # Perform object detection
-    results = object_detection_model(floorplan_image)
-    # Get detected objects and bounding boxes
-    detections = results.pandas().xyxy[0]
-    # Filter detections based on confidence and class names
-    threshold = 0.5  # Adjust as needed
-    filtered_detections = detections[
-        (detections['confidence'] > threshold)
-        & (detections['name'].isin(components))
-    ]
-    # Estimate sizes (assuming a scale of 1 pixel = 0.1 feet)
-    scale = 0.1  # Adjust according to the actual scale of the floor plan
-    component_sizes = []
-    for index, row in filtered_detections.iterrows():
-        width = (row['xmax'] - row['xmin']) * scale
-        height = (row['ymax'] - row['ymin']) * scale
-        component_sizes.append(f"{row['name']}: {width:.2f}ft x {height:.2f}ft")
-    # Combine with CLIP-based description
-    clip_description = describe_floorplan(floorplan_image, top_k)
-    final_description = clip_description + ", " + ", ".join(component_sizes)
-    return final_description
 # Create the Gradio interface
 gr.Interface(
-    fn=describe_floorplan_with_sizes,
     inputs=[
         gr.Image(label="Upload a floor plan drawing", type="pil"),
         gr.Slider(1, 10, step=1, value=3, label="Number of components to detect"),
     ],
-    outputs=gr.Label(label="Detected Components with Sizes"),
-    title="Floor Plan Description with TaarhoGen1 and Sizes",
-    description="Upload a floor plan drawing to get a list of detected components and their sizes.",
 ).launch()

 !pip install -U adapter-transformers
 !pip install -U transformers
 import gradio as gr
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
 import torch
+# Load the model and processor
 model = CLIPModel.from_pretrained("Taarhoinc/TaarhoGen1")
 processor = CLIPProcessor.from_pretrained("Taarhoinc/TaarhoGen1")
+# Define the function to describe a floor plan
+def describe_floorplan(floorplan_image: Image.Image, top_k: int = 3):
+    """Describes a floor plan drawing by listing components."""
     # Define a list of common floor plan components
     components = [
         "window",
     ]
+    # Preprocess the image and text prompts
+    inputs = processor(
+        text=components, images=floorplan_image, return_tensors="pt", padding=True
+    )
+    # Get the logits (similarity scores)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits_per_image = outputs.logits_per_image
+    # Get the predicted probabilities
+    probs = logits_per_image.softmax(dim=1).cpu().numpy()[0]
+    # Get the indices of the top-k components
+    top_k_indices = probs.argsort()[-top_k:][::-1]
+    # Get the top-k components
+    detected_components = [components[i] for i in top_k_indices]
+    return ", ".join(detected_components)  # Return as a comma-separated string
 # Create the Gradio interface
 gr.Interface(
+    fn=describe_floorplan,
     inputs=[
         gr.Image(label="Upload a floor plan drawing", type="pil"),
         gr.Slider(1, 10, step=1, value=3, label="Number of components to detect"),
     ],
+    outputs=gr.Label(label="Detected Components"),
+    title="Floor Plan Description with TaarhoGen1",
+    description="Upload a floor plan drawing to get a list of detected components.",
 ).launch()