Spaces:

derektan95
/

search-tta-demo

Running on Zero

App Files Files Community

derektan commited on Jul 25

Commit

40d2b47

1 Parent(s): 68a0d40

[UPDATE] Moved Search-tta init to process thread to solve ZeroGPU issue

Browse files

Files changed (2) hide show

app.py +64 -47
app_multimodal_inference.py +1 -0

app.py CHANGED Viewed

@@ -84,49 +84,47 @@ policy_net.load_state_dict(checkpoint['policy_model'])
 print('Model loaded!')
 # print(next(policy_net.parameters()).device)
-# Init Taxabind here (only need to init once)
-if TAXABIND_TTA:
-    # Instantiate TWO independent ClipSegTTA objects (one per concurrent run)
-    clip_seg_tta_1 = ClipSegTTA(
-        img_dir=TAXABIND_IMG_DIR,
-        imo_dir=TAXABIND_IMO_DIR,
-        json_path=TAXABIND_INAT_JSON_PATH,
-        sat_to_img_ids_json_path=TAXABIND_SAT_TO_IMG_IDS_JSON_PATH,
-        patch_size=TAXABIND_PATCH_SIZE,
-        sat_checkpoint_path=TAXABIND_SAT_CHECKPOINT_PATH,
-        sample_index = -1,   # Set using 'reset' in worker
-        blur_kernel = TAXABIND_GAUSSIAN_BLUR_KERNEL,
-        device=device, # device,
-        sat_to_img_ids_json_is_train_dict=False, # for search ds val
-        tax_to_filter_val=QUERY_TAX,
-        load_model=USE_CLIP_PREDS,
-        initial_modality=INITIAL_MODALITY,
-        sound_data_path = TAXABIND_SOUND_DATA_PATH,
-        sound_checkpoint_path=TAXABIND_SOUND_CHECKPOINT_PATH,
-        # sat_filtered_json_path=TAXABIND_FILTERED_INAT_JSON_PATH,
-    )
-    clip_seg_tta_2 = ClipSegTTA(
-        img_dir=TAXABIND_IMG_DIR,
-        imo_dir=TAXABIND_IMO_DIR,
-        json_path=TAXABIND_INAT_JSON_PATH,
-        sat_to_img_ids_json_path=TAXABIND_SAT_TO_IMG_IDS_JSON_PATH,
-        patch_size=TAXABIND_PATCH_SIZE,
-        sat_checkpoint_path=TAXABIND_SAT_CHECKPOINT_PATH,
-        sample_index = -1,   # Set using 'reset' in worker
-        blur_kernel = TAXABIND_GAUSSIAN_BLUR_KERNEL,
-        device=device,
-        sat_to_img_ids_json_is_train_dict=False,
-        tax_to_filter_val=QUERY_TAX,
-        load_model=USE_CLIP_PREDS,
-        initial_modality=INITIAL_MODALITY,
-        sound_data_path=TAXABIND_SOUND_DATA_PATH,
-        sound_checkpoint_path=TAXABIND_SOUND_CHECKPOINT_PATH,
-    )
-    print("ClipSegTTA instances loaded!")
-    # Keep original name for single-run mode compatibility
-    clip_seg_tta = clip_seg_tta_1
-else:
-    clip_seg_tta_1 = clip_seg_tta_2 = clip_seg_tta = None
 # Load metadata json
 tgts_metadata_json_path = os.path.join(script_dir, "examples/metadata.json")
@@ -179,7 +177,26 @@ def process_search_tta(
     # Helper to build a TestWorker with/without TTA
     def build_planner(enable_tta: bool, save_dir: str, clip_obj):
-        local_clip = clip_obj  # re-use the pre-instantiated ClipSegTTA
         if local_clip is not None:
             # Feed inputs to ClipSegTTA copy
             local_clip.img_paths = [ground_path] if ground_path else []
@@ -235,12 +252,12 @@ def process_search_tta(
     # Launch both planners in background threads – preparation included
     thread_tta = threading.Thread(
         target=_planner_thread,
-        args=(True, gifs_dir_tta, clip_seg_tta_1, "tta"),
         daemon=True,
     )
     thread_no = threading.Thread(
         target=_planner_thread,
-        args=(False, gifs_dir_no, clip_seg_tta_2, "no"),
         daemon=True,
     )
     _register_thread(thread_tta)
@@ -383,7 +400,7 @@ with gr.Blocks(title="Search-TTA (Simplified)", theme=gr.themes.Base()) as demo:
         """
         # Search-TTA: A Multimodal Test-Time Adaptation Framework for Visual Search in the Wild Demo
         Click on any of the <b>examples below</b> and run the <b>TTA demo</b>. Check out the <b>multimodal heatmap generation feature</b> by switching to the next tab above. <br>
-        Note that the model initialization, RL planner, and TTA updates are not fully optimized on GPU for this huggingface demo, and hence may experience some lag during execution. If you encounter an 'Error' status, refresh the browser and rerun the demo. We will improve this in the future.  <br>
         <a href="https://search-tta.github.io">Project Website</a>
         """
     )

 print('Model loaded!')
 # print(next(policy_net.parameters()).device)
+# # (ClipSegTTA will now be instantiated lazily inside each planner thread)
+# clip_seg_tta_1 = clip_seg_tta_2 = None  # placeholder; real instances created per thread
+# if False and TAXABIND_TTA:
+#     # Instantiate TWO independent ClipSegTTA objects (one per concurrent run)
+#     clip_seg_tta_1 = ClipSegTTA(
+#         img_dir=TAXABIND_IMG_DIR,
+#         imo_dir=TAXABIND_IMO_DIR,
+#         json_path=TAXABIND_INAT_JSON_PATH,
+#         sat_to_img_ids_json_path=TAXABIND_SAT_TO_IMG_IDS_JSON_PATH,
+#         patch_size=TAXABIND_PATCH_SIZE,
+#         sat_checkpoint_path=TAXABIND_SAT_CHECKPOINT_PATH,
+#         sample_index = -1,   # Set using 'reset' in worker
+#         blur_kernel = TAXABIND_GAUSSIAN_BLUR_KERNEL,
+#         device=device, # device,
+#         sat_to_img_ids_json_is_train_dict=False, # for search ds val
+#         tax_to_filter_val=QUERY_TAX,
+#         load_model=USE_CLIP_PREDS,
+#         initial_modality=INITIAL_MODALITY,
+#         sound_data_path = TAXABIND_SOUND_DATA_PATH,
+#         sound_checkpoint_path=TAXABIND_SOUND_CHECKPOINT_PATH,
+#         # sat_filtered_json_path=TAXABIND_FILTERED_INAT_JSON_PATH,
+#     )
+#     clip_seg_tta_2 = ClipSegTTA(
+#         img_dir=TAXABIND_IMG_DIR,
+#         imo_dir=TAXABIND_IMO_DIR,
+#         json_path=TAXABIND_INAT_JSON_PATH,
+#         sat_to_img_ids_json_path=TAXABIND_SAT_TO_IMG_IDS_JSON_PATH,
+#         patch_size=TAXABIND_PATCH_SIZE,
+#         sat_checkpoint_path=TAXABIND_SAT_CHECKPOINT_PATH,
+#         sample_index = -1,   # Set using 'reset' in worker
+#         blur_kernel = TAXABIND_GAUSSIAN_BLUR_KERNEL,
+#         device=device,
+#         sat_to_img_ids_json_is_train_dict=False,
+#         tax_to_filter_val=QUERY_TAX,
+#         load_model=USE_CLIP_PREDS,
+#         initial_modality=INITIAL_MODALITY,
+#         sound_data_path=TAXABIND_SOUND_DATA_PATH,
+#         sound_checkpoint_path=TAXABIND_SOUND_CHECKPOINT_PATH,
+#     )
 # Load metadata json
 tgts_metadata_json_path = os.path.join(script_dir, "examples/metadata.json")
     # Helper to build a TestWorker with/without TTA
     def build_planner(enable_tta: bool, save_dir: str, clip_obj):
+        # Lazily (re)create a ClipSegTTA instance per thread if not provided
+        local_clip = clip_obj
+        if TAXABIND_TTA and local_clip is None:
+            local_clip = ClipSegTTA(
+                img_dir=TAXABIND_IMG_DIR,
+                imo_dir=TAXABIND_IMO_DIR,
+                json_path=TAXABIND_INAT_JSON_PATH,
+                sat_to_img_ids_json_path=TAXABIND_SAT_TO_IMG_IDS_JSON_PATH,
+                patch_size=TAXABIND_PATCH_SIZE,
+                sat_checkpoint_path=TAXABIND_SAT_CHECKPOINT_PATH,
+                sample_index=-1,
+                blur_kernel=TAXABIND_GAUSSIAN_BLUR_KERNEL,
+                device=device,
+                sat_to_img_ids_json_is_train_dict=False,
+                tax_to_filter_val=QUERY_TAX,
+                load_model=USE_CLIP_PREDS,
+                initial_modality=INITIAL_MODALITY,
+                sound_data_path=TAXABIND_SOUND_DATA_PATH,
+                sound_checkpoint_path=TAXABIND_SOUND_CHECKPOINT_PATH,
+            )
         if local_clip is not None:
             # Feed inputs to ClipSegTTA copy
             local_clip.img_paths = [ground_path] if ground_path else []
     # Launch both planners in background threads – preparation included
     thread_tta = threading.Thread(
         target=_planner_thread,
+        args=(True, gifs_dir_tta, None, "tta"),
         daemon=True,
     )
     thread_no = threading.Thread(
         target=_planner_thread,
+        args=(False, gifs_dir_no, None, "no"),
         daemon=True,
     )
     _register_thread(thread_tta)
         """
         # Search-TTA: A Multimodal Test-Time Adaptation Framework for Visual Search in the Wild Demo
         Click on any of the <b>examples below</b> and run the <b>TTA demo</b>. Check out the <b>multimodal heatmap generation feature</b> by switching to the next tab above. <br>
+        Note that the model initialization, RL planner, and TTA updates are not fully optimized on GPU for this huggingface demo, and hence may experience some lag during execution. If you encounter an 'Error' status, refresh the browser and rerun the demo, or try again the next day. We will improve this in the future.  <br>
         <a href="https://search-tta.github.io">Project Website</a>
         """
     )

app_multimodal_inference.py CHANGED Viewed

@@ -185,6 +185,7 @@ with gr.Blocks(title="Search-TTA", theme=gr.themes.Base()) as demo:
         """
         # Search-TTA: A Multimodal Test-Time Adaptation Framework for Visual Search in the Wild Demo
         Click on any of the <b>examples below</b> and run the <b>multimodal inference demo</b>. Check out the <b>test-time adaptation feature</b> by switching to the previous tab above. <br>
         <a href="https://search-tta.github.io">Project Website</a>
         """
     )

         """
         # Search-TTA: A Multimodal Test-Time Adaptation Framework for Visual Search in the Wild Demo
         Click on any of the <b>examples below</b> and run the <b>multimodal inference demo</b>. Check out the <b>test-time adaptation feature</b> by switching to the previous tab above. <br>
+        If you encounter any errors, refresh the browser and rerun the demo, or try again the next day. We will improve this in the future. <br>
         <a href="https://search-tta.github.io">Project Website</a>
         """
     )