File size: 7,299 Bytes
182786f
2aa928f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca7f6e5
2da0af2
ca7f6e5
2da0af2
 
2aa928f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2da0af2
2aa928f
 
2da0af2
2aa928f
 
182786f
2aa928f
 
 
 
a5607cc
2aa928f
182786f
 
 
2da0af2
182786f
2da0af2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182786f
2da0af2
 
 
 
 
182786f
 
 
2aa928f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import gradio as gr
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TextIteratorStreamer,
)
import os
from threading import Thread
import spaces
import time
import subprocess

subprocess.run(
    "pip install flash-attn --no-build-isolation",
    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
    shell=True,
)
hf_token = os.getenv("HF_TOKEN")

token = hf_token


model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-128k-instruct",
    token=token,
    trust_remote_code=True,
)
tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)
terminators = [
    tok.eos_token_id,
]

if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(device)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

model = model.to(device)
# Dispatch Errors


@spaces.GPU(duration=60)
def chat(message, history, temperature, do_sample, max_tokens):
    chat = []
    for item in history:
        chat.append({"role": "user", "content": item[0]})
        if item[1] is not None:
            chat.append({"role": "assistant", "content": item[1]})
    chat.append({"role": "user", "content": message})
    messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    model_inputs = tok([messages], return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(
        tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True
    )
    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        eos_token_id=terminators,
    )

    if temperature == 0:
        generate_kwargs["do_sample"] = False

    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    partial_text = ""
    for new_text in streamer:
        partial_text += new_text
        yield partial_text

    yield partial_text


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
    value=(
            "You are an assistant for controlling PTZ cameras.\n"
            "When the user gives you a clear command, please JUST respond in the following format:\n"
            "Camera:<camera_id>. Tracking_Target:<target_name> placement:<position> speed:<speed> only_ptz_action:<only_ptz_action> tracking_action:<tracking_action>.\n"
            "If multiple cameras are specified, provide separate lines for each camera.\n"
            "Only provide commands for the cameras specified by the user.\n"
            "Do not include additional cameras that the user did not mention.\n"
            "Ensure all field names are spelled correctly.\n\n"
            "The available placements are ONLY: top_left, top_middle, top_right, center_left, center_middle, center_right, bottom_left, bottom_middle, bottom_right.\n"
            "The available speed options are ONLY: slow, medium, fast.\n"
            "The available only_ptz_actions are ONLY: turn_right, turn_left, tilt_up, tilt_down, zoom_in, zoom_out, stop.\n"
            "The available tracking_actions are ONLY: tracking.\n\n"
            "Default Values:\n"
            "- camera_id: default\n"
            "- tracking_target: default\n"
            "- placement: center_middle\n"
            "- speed: medium\n"
            "- only_ptz_action: default\n"
            "- tracking_action: default\n\n"
            "Rules for Defaults:\n"
            "1. If the camera_id is not specified, use the default value `default`.\n"
            "2. If the tracking_target is not specified, use the default value `default`.\n"
            "3. If the position information is incomplete or not specified, default the placement to `center_middle`.\n"
            "4. If only a general direction is specified, interpret it as the middle of that direction.\n"
            "   For example, 'top' is interpreted as 'top_middle' and 'left' as 'center_left'.\n"
            "5. If the speed is not specified, default to `medium`.\n"
            "6. If the only_ptz_action is not specified, default to `default`.\n"
            "7. If the tracking_action is not specified, default to `default`.\n"
            "8. Camera IDs are restricted to 1, 2, 3, and 4. If an invalid camera_id is provided, use `default`.\n"
            "9. If the user specifies 'all camera' or 'all cameras', apply the command to all cameras (1-4).\n\n"
            "**Special Action Handling**:\n"
            "- If only `camera_id` and `only_ptz_action` are specified (all other fields are `default`), execute only the specified `only_ptz_action`.\n"
            "- If only `camera_id` and `tracking_action` are specified (all other fields are `default`), execute only the specified `tracking_action`.\n"
            "- When tracking is involved, set `tracking_action` to `tracking`.\n\n"
            "Examples:\n"
            "User: Please set camera 1 to track target Alice at bottom_right with speed fast and action turn_right.\n"
            "Assistant: Camera:1. Tracking_Target:Alice placement:bottom_right speed:fast only_ptz_action:turn_right tracking_action:default.\n\n"
            "User: Please set camera 3 to track target Bob at top with speed slow.\n"
            "Assistant: Camera:3. Tracking_Target:Bob placement:top_middle speed:slow only_ptz_action:default tracking_action:tracking.\n\n"
            "User: Please set camera 2 to track target Carol.\n"
            "Assistant: Camera:2. Tracking_Target:Carol placement:center_middle speed:medium only_ptz_action:default tracking_action:tracking.\n\n"
            "User: Please track target Dave at left.\n"
            "Assistant: Camera:default. Tracking_Target:Dave placement:center_left speed:medium only_ptz_action:default tracking_action:tracking.\n\n"
            "User: Please control camera 4.\n"
            "Assistant: Camera:4. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:default tracking_action:default.\n\n"
            "User: Please start recording.\n"
            "Assistant: Camera:default. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:default tracking_action:default.\n\n"
            "User: Camera 5 action turn_left.\n"
            "Assistant: Camera:default. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:turn_left tracking_action:default.\n\n"
            "User: camera 1 and 2 turn right.\n"
            "Assistant:\n"
            "Camera:1. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:turn_right tracking_action:default.\n"
            "Camera:2. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:turn_right tracking_action:default.\n\n"
            "Respond with ONLY the Assistant's output. Do NOT add any extra text."

    ),
    label="System message"
)
,
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)
demo.launch()