Spaces:
Runtime error
Runtime error
fix
Browse files
app.py
CHANGED
@@ -4,13 +4,14 @@ subprocess.run(
|
|
4 |
"pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True
|
5 |
)
|
6 |
|
7 |
-
from typing import Any, List
|
8 |
|
9 |
import gradio as gr
|
10 |
import requests
|
11 |
import spaces
|
12 |
import torch
|
13 |
from PIL import Image, ImageDraw
|
|
|
14 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
15 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
16 |
|
@@ -42,7 +43,7 @@ except Exception as e:
|
|
42 |
# --- Helper functions from the model card (or adapted) ---
|
43 |
|
44 |
|
45 |
-
|
46 |
def run_inference_localization(
|
47 |
messages_for_template: List[dict[str, Any]], pil_image_for_processing: Image.Image
|
48 |
) -> str:
|
@@ -82,10 +83,6 @@ def run_inference_localization(
|
|
82 |
return decoded_output[0] if decoded_output else ""
|
83 |
|
84 |
|
85 |
-
from typing import Literal
|
86 |
-
|
87 |
-
from pydantic import BaseModel, Field
|
88 |
-
|
89 |
SYSTEM_PROMPT: str = """Imagine you are a robot browsing the web, just like humans. Now you need to complete a task.
|
90 |
In each iteration, you will receive an Observation that includes the last screenshots of a web browser and the current memory of the agent.
|
91 |
You have also information about the step that the agent is trying to achieve to solve the task.
|
|
|
4 |
"pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True
|
5 |
)
|
6 |
|
7 |
+
from typing import Any, List, Literal
|
8 |
|
9 |
import gradio as gr
|
10 |
import requests
|
11 |
import spaces
|
12 |
import torch
|
13 |
from PIL import Image, ImageDraw
|
14 |
+
from pydantic import BaseModel, Field
|
15 |
from transformers import AutoModelForImageTextToText, AutoProcessor
|
16 |
from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize
|
17 |
|
|
|
43 |
# --- Helper functions from the model card (or adapted) ---
|
44 |
|
45 |
|
46 |
+
@spaces.GPU(duration=20)
|
47 |
def run_inference_localization(
|
48 |
messages_for_template: List[dict[str, Any]], pil_image_for_processing: Image.Image
|
49 |
) -> str:
|
|
|
83 |
return decoded_output[0] if decoded_output else ""
|
84 |
|
85 |
|
|
|
|
|
|
|
|
|
86 |
SYSTEM_PROMPT: str = """Imagine you are a robot browsing the web, just like humans. Now you need to complete a task.
|
87 |
In each iteration, you will receive an Observation that includes the last screenshots of a web browser and the current memory of the agent.
|
88 |
You have also information about the step that the agent is trying to achieve to solve the task.
|