Spaces:
Sleeping
Sleeping
File size: 5,458 Bytes
21a478e 3bf4da9 1be87ac 21a478e 8037c4b 1be87ac 3bf4da9 2d5f396 1be87ac 4ca2388 ebdb55f 4132916 4ca2388 1be87ac 4ca2388 1be87ac 4ca2388 1be87ac 4ca2388 80eed0f 1be87ac fc98e77 1be87ac e528476 1be87ac e528476 1be87ac e528476 dc9a7b6 4ca2388 3bf4da9 1be87ac e528476 dc9a7b6 1be87ac 4ca2388 4facf91 ddb51d2 ca3d9f6 0775334 202881e 4ca2388 fc98e77 40b508f 4ca2388 40b508f 4ca2388 700ffae b597dd2 700ffae 40b508f 700ffae 4132916 700ffae 40b508f 1be87ac e528476 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
ACCESS_TOKEN = os.getenv("myHFtoken")
print("Access token loaded.")
client = InferenceClient(api_key=ACCESS_TOKEN)
print("Client initialized.")
SYSTEM_PROMPTS = {
"zh-HK": "必須用繁體字,香港廣東話語調對話. No chatty. Answer in simple but accurate way.",
"zh-TW": "Chat by Traditional Chinese language of Taiwan (zh-TW). No chatty. Answer in simple but accurate way.",
"EN: General Assistant": "You are a helpful, respectful and honest assistant. Always provide accurate information and admit when you're not sure about something.",
"EN: Code Helper": "You are a programming assistant. Help users with coding questions, debugging, and best practices. Provide clear explanations and code examples when appropriate.",
"EN: Creative Writer": "You are a creative writing assistant. Help users with storytelling, character development, and creative writing techniques. Be imaginative and encouraging."
}
def respond(
message,
history: list[tuple[str, str]],
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name,
):
print(f"Received message: {message}")
print(f"History: {history}")
system_message = custom_prompt if custom_prompt.strip() else SYSTEM_PROMPTS[preset_prompt]
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Selected model: {model_name}")
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
print(f"Added user message to context: {val[0]}")
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
print(f"Added assistant message to context: {val[1]}")
messages.append({"role": "user", "content": message})
response = ""
print("Sending request to Hugging Face API.")
stream = client.chat.completions.create(
model=model_name,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True
)
for chunk in stream:
try:
# Handle Hugging Face's streaming format
token = chunk.choices[0].delta.content
if token: # Skip empty tokens
response += token
yield response
print(f"Streamed token: {token}")
except AttributeError as e:
print(f"Error processing chunk: {e}")
continue
print("Completed response generation.")
models = [
#"microsoft/Phi-4-mini-instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
"PowerInfer/SmallThinker-3B-Preview",
"Qwen/QwQ-32B-Preview",
"Qwen/Qwen2.5-Coder-32B-Instruct",
"microsoft/Phi-3-mini-128k-instruct",
]
with gr.Blocks() as demo:
gr.Markdown("# LLM Test")
with gr.Row():
model_dropdown = gr.Dropdown(
choices=models,
value=models[0],
label="Select Model:"
)
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(
show_label=False,
placeholder="Enter text and press enter",
container=False
)
clear = gr.Button("Clear")
with gr.Accordion("Configuration", open=False):
preset_prompt = gr.Dropdown(
choices=list(SYSTEM_PROMPTS.keys()),
value=list(SYSTEM_PROMPTS.keys())[0],
label="Select System Prompt:"
)
custom_prompt = gr.Textbox(
value="",
label="Custom System Prompt (leaves blank to use preset):",
lines=2
)
max_tokens = gr.Slider(
minimum=1,
maximum=8192,
value=2048,
step=1,
label="Max new tokens:"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.3,
step=0.1,
label="Temperature:"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P:"
)
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(
history,
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name
):
history[-1][1] = ""
for character in respond(
history[-1][0],
history[:-1],
preset_prompt,
custom_prompt,
max_tokens,
temperature,
top_p,
model_name
):
history[-1][1] = character
yield history
msg.submit(
user,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot,
[chatbot, preset_prompt, custom_prompt, max_tokens, temperature, top_p, model_dropdown],
chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch() |