File size: 5,458 Bytes
21a478e
3bf4da9
1be87ac
21a478e
8037c4b
1be87ac
 
 
3bf4da9
2d5f396
 
1be87ac
4ca2388
ebdb55f
4132916
 
 
 
4ca2388
 
1be87ac
 
 
4ca2388
 
1be87ac
 
 
4ca2388
1be87ac
 
 
4ca2388
 
 
80eed0f
1be87ac
fc98e77
1be87ac
 
e528476
1be87ac
 
 
 
 
 
 
 
 
e528476
1be87ac
e528476
 
dc9a7b6
4ca2388
3bf4da9
1be87ac
 
 
e528476
dc9a7b6
 
 
 
 
 
 
 
 
 
 
 
 
 
1be87ac
 
4ca2388
4facf91
ddb51d2
ca3d9f6
0775334
 
 
202881e
4ca2388
 
 
fc98e77
 
40b508f
4ca2388
40b508f
4ca2388
 
 
 
 
 
700ffae
b597dd2
700ffae
 
 
 
40b508f
700ffae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4132916
 
700ffae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40b508f
1be87ac
 
 
 
e528476
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import gradio as gr
from huggingface_hub import InferenceClient
import os

ACCESS_TOKEN = os.getenv("myHFtoken")

print("Access token loaded.")

client = InferenceClient(api_key=ACCESS_TOKEN)

print("Client initialized.")

SYSTEM_PROMPTS = {
    "zh-HK": "必須用繁體字,香港廣東話語調對話. No chatty. Answer in simple but accurate way.",
    "zh-TW": "Chat by Traditional Chinese language of Taiwan (zh-TW). No chatty. Answer in simple but accurate way.",
    "EN: General Assistant": "You are a helpful, respectful and honest assistant. Always provide accurate information and admit when you're not sure about something.",
    "EN: Code Helper": "You are a programming assistant. Help users with coding questions, debugging, and best practices. Provide clear explanations and code examples when appropriate.",
    "EN: Creative Writer": "You are a creative writing assistant. Help users with storytelling, character development, and creative writing techniques. Be imaginative and encouraging."
}

def respond(
    message,
    history: list[tuple[str, str]],
    preset_prompt,
    custom_prompt,
    max_tokens,
    temperature,
    top_p,
    model_name,
):
    print(f"Received message: {message}")
    print(f"History: {history}")
    
    system_message = custom_prompt if custom_prompt.strip() else SYSTEM_PROMPTS[preset_prompt]
    
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Selected model: {model_name}")

    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
            print(f"Added user message to context: {val[0]}")
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
            print(f"Added assistant message to context: {val[1]}")

    messages.append({"role": "user", "content": message})

    response = ""
    print("Sending request to Hugging Face API.")
    
    stream = client.chat.completions.create(
        model=model_name,
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True
    )

    for chunk in stream:
        try:
            # Handle Hugging Face's streaming format
            token = chunk.choices[0].delta.content
            
            if token:  # Skip empty tokens
                response += token
                yield response
                print(f"Streamed token: {token}")
        except AttributeError as e:
            print(f"Error processing chunk: {e}")
            continue

    print("Completed response generation.")

models = [
    #"microsoft/Phi-4-mini-instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
    "PowerInfer/SmallThinker-3B-Preview",
    "Qwen/QwQ-32B-Preview",
    "Qwen/Qwen2.5-Coder-32B-Instruct",
    "microsoft/Phi-3-mini-128k-instruct",
]

with gr.Blocks() as demo:
    gr.Markdown("# LLM Test")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=models, 
            value=models[0], 
            label="Select Model:"
        )

    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(
        show_label=False,
        placeholder="Enter text and press enter",
        container=False
    )
    clear = gr.Button("Clear")

    with gr.Accordion("Configuration", open=False):
        preset_prompt = gr.Dropdown(
            choices=list(SYSTEM_PROMPTS.keys()),
            value=list(SYSTEM_PROMPTS.keys())[0],
            label="Select System Prompt:"
        )
        custom_prompt = gr.Textbox(
            value="",
            label="Custom System Prompt (leaves blank to use preset):",
            lines=2
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=8192,
            value=2048,
            step=1,
            label="Max new tokens:"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.3,
            step=0.1,
            label="Temperature:"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P:"
        )

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(
        history,
        preset_prompt,
        custom_prompt,
        max_tokens,
        temperature,
        top_p,
        model_name
    ):
        history[-1][1] = ""
        for character in respond(
            history[-1][0],
            history[:-1],
            preset_prompt,
            custom_prompt,
            max_tokens,
            temperature,
            top_p,
            model_name
        ):
            history[-1][1] = character
            yield history

    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, preset_prompt, custom_prompt, max_tokens, temperature, top_p, model_dropdown],
        chatbot
    )

    clear.click(lambda: None, None, chatbot, queue=False)

print("Gradio interface initialized.")

if __name__ == "__main__":
    print("Launching the demo application.")
    demo.launch()