plcedoz38 commited on
Commit
e70b669
·
1 Parent(s): 6cb6b84
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -4,13 +4,14 @@ subprocess.run(
4
  "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True
5
  )
6
 
7
- from typing import Any, List
8
 
9
  import gradio as gr
10
  import requests
11
  import spaces
12
  import torch
13
  from PIL import Image, ImageDraw
 
14
  from transformers import AutoModelForImageTextToText, AutoProcessor
15
  from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
16
 
@@ -42,7 +43,7 @@ except Exception as e:
42
  # --- Helper functions from the model card (or adapted) ---
43
 
44
 
45
- # @spaces.GPU(duration=120)
46
  def run_inference_localization(
47
  messages_for_template: List[dict[str, Any]], pil_image_for_processing: Image.Image
48
  ) -> str:
@@ -82,10 +83,6 @@ def run_inference_localization(
82
  return decoded_output[0] if decoded_output else ""
83
 
84
 
85
- from typing import Literal
86
-
87
- from pydantic import BaseModel, Field
88
-
89
  SYSTEM_PROMPT: str = """Imagine you are a robot browsing the web, just like humans. Now you need to complete a task.
90
  In each iteration, you will receive an Observation that includes the last screenshots of a web browser and the current memory of the agent.
91
  You have also information about the step that the agent is trying to achieve to solve the task.
 
4
  "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True
5
  )
6
 
7
+ from typing import Any, List, Literal
8
 
9
  import gradio as gr
10
  import requests
11
  import spaces
12
  import torch
13
  from PIL import Image, ImageDraw
14
+ from pydantic import BaseModel, Field
15
  from transformers import AutoModelForImageTextToText, AutoProcessor
16
  from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
17
 
 
43
  # --- Helper functions from the model card (or adapted) ---
44
 
45
 
46
+ @spaces.GPU(duration=20)
47
  def run_inference_localization(
48
  messages_for_template: List[dict[str, Any]], pil_image_for_processing: Image.Image
49
  ) -> str:
 
83
  return decoded_output[0] if decoded_output else ""
84
 
85
 
 
 
 
 
86
  SYSTEM_PROMPT: str = """Imagine you are a robot browsing the web, just like humans. Now you need to complete a task.
87
  In each iteration, you will receive an Observation that includes the last screenshots of a web browser and the current memory of the agent.
88
  You have also information about the step that the agent is trying to achieve to solve the task.