Spaces:

Sanket17
/

NewParser

Runtime error

App Files Files Community

Sanket17 commited on Dec 31, 2024

Commit

9f9625c

verified ·

1 Parent(s): eeac153

Update main.py

Browse files

Files changed (1) hide show

main.py +57 -58

main.py CHANGED Viewed

@@ -1,77 +1,65 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
-from typing import Optional
 import base64
 import io
-from PIL import Image
-import torch
-import numpy as np
 import os
-# Existing imports
-import numpy as np
-import torch
 from PIL import Image
-import io
-from utils import (
-    check_ocr_box,
-    get_yolo_model,
-    get_caption_model_processor,
-    get_som_labeled_img,
-)
 import torch
-yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt')
-#caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="icon_caption_florence")
 from ultralytics import YOLO
 if not os.path.exists("weights/icon_detect"):
     os.makedirs("weights/icon_detect")
 try:
     yolo_model = YOLO("weights/icon_detect/best.pt").to("cuda")
-except:
-    yolo_model = YOLO("weights/icon_detect/best.pt")
-from transformers import AutoProcessor, AutoModelForCausalLM
-processor = AutoProcessor.from_pretrained(
-    "microsoft/Florence-2-base", trust_remote_code=True
-)
 try:
     model = AutoModelForCausalLM.from_pretrained(
         "microsoft/OmniParser",
         torch_dtype=torch.float16,
-        trust_remote_code=True,
     ).to("cuda")
-except:
     model = AutoModelForCausalLM.from_pretrained(
         "microsoft/OmniParser",
         torch_dtype=torch.float16,
-        trust_remote_code=True,
     )
 caption_model_processor = {"processor": processor, "model": model}
-print("finish loading model!!!")
 app = FastAPI()
 class ProcessResponse(BaseModel):
     image: str  # Base64 encoded image
     parsed_content_list: str
     label_coordinates: str
 def process(
     image_input: Image.Image, box_threshold: float, iou_threshold: float
 ) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
     image_input.save(image_save_path)
     image = Image.open(image_save_path)
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
         "text_scale": 0.8 * box_overlay_ratio,
@@ -80,30 +68,40 @@ def process(
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
-    ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
-        image_save_path,
-        display_img=False,
-        output_bb_format="xyxy",
-        goal_filtering=None,
-        easyocr_args={"paragraph": False, "text_threshold": 0.9},
-        use_paddleocr=True,
-    )
-    text, ocr_bbox = ocr_bbox_rslt
-    dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
-        image_save_path,
-        yolo_model,
-        BOX_TRESHOLD=box_threshold,
-        output_coord_in_ratio=True,
-        ocr_bbox=ocr_bbox,
-        draw_bbox_config=draw_bbox_config,
-        caption_model_processor=caption_model_processor,
-        ocr_text=text,
-        iou_threshold=iou_threshold,
-    )
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
-    print("finish processing")
     parsed_content_list_str = "\n".join(parsed_content_list)
     # Encode image to base64
     buffered = io.BytesIO()
     image.save(buffered, format="PNG")
@@ -115,7 +113,7 @@ def process(
         label_coordinates=str(label_coordinates),
     )
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
@@ -126,7 +124,8 @@ async def process_image(
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
     except Exception as e:
-        raise HTTPException(status_code=400, detail="Invalid image file")
     response = process(image_input, box_threshold, iou_threshold)
     return response

 from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 import base64
 import io
 import os
 from PIL import Image
 import torch
+import numpy as np
 from ultralytics import YOLO
+from transformers import AutoProcessor, AutoModelForCausalLM
+# Ensure directories exist
 if not os.path.exists("weights/icon_detect"):
     os.makedirs("weights/icon_detect")
+# Model loading with error handling
 try:
+    # Load YOLO model
     yolo_model = YOLO("weights/icon_detect/best.pt").to("cuda")
+except Exception as e:
+    print(f"Error loading YOLO model: {e}")
+    yolo_model = YOLO("weights/icon_detect/best.pt")  # Load on CPU if CUDA fails
+# Load Caption Model (Florence and OmniParser)
 try:
+    processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         "microsoft/OmniParser",
         torch_dtype=torch.float16,
+        trust_remote_code=True
     ).to("cuda")
+except Exception as e:
+    print(f"Error loading caption model: {e}")
+    processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         "microsoft/OmniParser",
         torch_dtype=torch.float16,
+        trust_remote_code=True
     )
 caption_model_processor = {"processor": processor, "model": model}
+print("Finished loading models!")
+# FastAPI app initialization
 app = FastAPI()
+# Pydantic response model
 class ProcessResponse(BaseModel):
     image: str  # Base64 encoded image
     parsed_content_list: str
     label_coordinates: str
+# Function to process the image, apply YOLO, and generate captions
 def process(
     image_input: Image.Image, box_threshold: float, iou_threshold: float
 ) -> ProcessResponse:
     image_save_path = "imgs/saved_image_demo.png"
     image_input.save(image_save_path)
     image = Image.open(image_save_path)
+    # Ratio for bounding box scaling
     box_overlay_ratio = image.size[0] / 3200
     draw_bbox_config = {
         "text_scale": 0.8 * box_overlay_ratio,
         "thickness": max(int(3 * box_overlay_ratio), 1),
     }
+    # OCR Box Detection and Filtering (using EasyOCR and PaddleOCR)
+    try:
+        ocr_bbox_rslt, is_goal_filtered = check_ocr_box(
+            image_save_path,
+            display_img=False,
+            output_bb_format="xyxy",
+            goal_filtering=None,
+            easyocr_args={"paragraph": False, "text_threshold": 0.9},
+            use_paddleocr=True,
+        )
+        text, ocr_bbox = ocr_bbox_rslt
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"OCR processing failed: {e}")
+    # YOLO and Caption Model Inference
+    try:
+        dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(
+            image_save_path,
+            yolo_model,
+            BOX_TRESHOLD=box_threshold,
+            output_coord_in_ratio=True,
+            ocr_bbox=ocr_bbox,
+            draw_bbox_config=draw_bbox_config,
+            caption_model_processor=caption_model_processor,
+            ocr_text=text,
+            iou_threshold=iou_threshold,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"YOLO or caption model inference failed: {e}")
+    # Convert processed image to base64
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
     parsed_content_list_str = "\n".join(parsed_content_list)
     # Encode image to base64
     buffered = io.BytesIO()
     image.save(buffered, format="PNG")
         label_coordinates=str(label_coordinates),
     )
+# FastAPI route to process uploaded image
 @app.post("/process_image", response_model=ProcessResponse)
 async def process_image(
     image_file: UploadFile = File(...),
         contents = await image_file.read()
         image_input = Image.open(io.BytesIO(contents)).convert("RGB")
     except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid image file: {e}")
+    # Process the image
     response = process(image_input, box_threshold, iou_threshold)
     return response