derekalia commited on
Commit
46b30d6
·
1 Parent(s): 5f56249
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -78,9 +78,11 @@ def process(
78
  # import pdb; pdb.set_trace()
79
 
80
  ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=True)
 
 
81
  text, ocr_bbox = ocr_bbox_rslt
82
  # print('prompt:', prompt)
83
- dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold)
84
  image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
85
  print('finish processing')
86
  parsed_content_list = '\n'.join(parsed_content_list)
@@ -93,7 +95,22 @@ def process(
93
  # coords_formatted = [f"{coord:.1f}" for coord in coords_list]
94
  # coordinates_text += f"Box {box_id}: [{coords_formatted[0]}, {coords_formatted[1]}, {coords_formatted[2]}, {coords_formatted[3]}]\n"
95
 
96
- return image, str(parsed_content_list), str(label_coordinates)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
 
99
  with gr.Blocks() as demo:
 
78
  # import pdb; pdb.set_trace()
79
 
80
  ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=True)
81
+
82
+
83
  text, ocr_bbox = ocr_bbox_rslt
84
  # print('prompt:', prompt)
85
+ dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=False, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold)
86
  image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
87
  print('finish processing')
88
  parsed_content_list = '\n'.join(parsed_content_list)
 
95
  # coords_formatted = [f"{coord:.1f}" for coord in coords_list]
96
  # coordinates_text += f"Box {box_id}: [{coords_formatted[0]}, {coords_formatted[1]}, {coords_formatted[2]}, {coords_formatted[3]}]\n"
97
 
98
+ combined_content = []
99
+ for i, content in enumerate(parsed_content_list):
100
+ if content.startswith('Text Box ID'):
101
+ box_id = str(i)
102
+ else:
103
+ # Extract the ID number from Icon Box ID format
104
+ box_id = content.split('Icon Box ID ')[1].split(':')[0]
105
+
106
+ coords = label_coordinates.get(box_id)
107
+ if coords is not None: # Changed from 'if coords:' to handle numpy arrays
108
+ coords_str = [round(x) for x in coords] # Convert numpy values to rounded integers
109
+ combined_content.append(f"{content} | Coordinates: {coords_str}")
110
+ else:
111
+ combined_content.append(content)
112
+
113
+ return image, str(parsed_content_list), str(combined_content)
114
 
115
 
116
  with gr.Blocks() as demo: