Spaces:

Hcompany
/

Holo1-Navigation

Running on Zero

plcedoz38 commited on Jun 4

Commit

e70b669

1 Parent(s): 6cb6b84

fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,13 +4,14 @@ subprocess.run(
     "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True
 )
-from typing import Any, List
 import gradio as gr
 import requests
 import spaces
 import torch
 from PIL import Image, ImageDraw
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
@@ -42,7 +43,7 @@ except Exception as e:
 # --- Helper functions from the model card (or adapted) ---
-# @spaces.GPU(duration=120)
 def run_inference_localization(
     messages_for_template: List[dict[str, Any]], pil_image_for_processing: Image.Image
 ) -> str:
@@ -82,10 +83,6 @@ def run_inference_localization(
     return decoded_output[0] if decoded_output else ""
-from typing import Literal
-from pydantic import BaseModel, Field
 SYSTEM_PROMPT: str = """Imagine you are a robot browsing the web, just like humans. Now you need to complete a task.
 In each iteration, you will receive an Observation that includes the last  screenshots of a web browser and the current memory of the agent.
 You have also information about the step that the agent is trying to achieve to solve the task.

     "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True
 )
+from typing import Any, List, Literal
 import gradio as gr
 import requests
 import spaces
 import torch
 from PIL import Image, ImageDraw
+from pydantic import BaseModel, Field
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
 # --- Helper functions from the model card (or adapted) ---
+@spaces.GPU(duration=20)
 def run_inference_localization(
     messages_for_template: List[dict[str, Any]], pil_image_for_processing: Image.Image
 ) -> str:
     return decoded_output[0] if decoded_output else ""
 SYSTEM_PROMPT: str = """Imagine you are a robot browsing the web, just like humans. Now you need to complete a task.
 In each iteration, you will receive an Observation that includes the last  screenshots of a web browser and the current memory of the agent.
 You have also information about the step that the agent is trying to achieve to solve the task.