Spaces:

SkyNetWalker
/

HF-LLMs

Sleeping

File size: 5,458 Bytes

21a478e
3bf4da9
1be87ac
21a478e
8037c4b
1be87ac
 
 
3bf4da9
2d5f396
 
1be87ac
4ca2388
ebdb55f
4132916
 
 
 
4ca2388
 
1be87ac
 
 
4ca2388
 
1be87ac
 
 
4ca2388
1be87ac
 
 
4ca2388
 
 
80eed0f
1be87ac
fc98e77
1be87ac
 
e528476
1be87ac
 
 
 
 
 
 
 
 
e528476
1be87ac
e528476
 
dc9a7b6
4ca2388
3bf4da9
1be87ac
 
 
e528476
dc9a7b6
 
 
 
 
 
 
 
 
 
 
 
 
 
1be87ac
 
4ca2388
4facf91
ddb51d2
ca3d9f6
0775334
 
 
202881e
4ca2388
 
 
fc98e77
 
40b508f
4ca2388
40b508f
4ca2388
 
 
 
 
 
700ffae
b597dd2
700ffae
 
 
 
40b508f
700ffae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4132916
 
700ffae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40b508f
1be87ac
 
 
 
e528476

import gradio as gr
from huggingface_hub import InferenceClient
import os

ACCESS_TOKEN = os.getenv("myHFtoken")

print("Access token loaded.")

client = InferenceClient(api_key=ACCESS_TOKEN)

print("Client initialized.")

SYSTEM_PROMPTS = {
    "zh-HK": "必須用繁體字，香港廣東話語調對話. No chatty. Answer in simple but accurate way.",
    "zh-TW": "Chat by Traditional Chinese language of Taiwan (zh-TW). No chatty. Answer in simple but accurate way.",
    "EN: General Assistant": "You are a helpful, respectful and honest assistant. Always provide accurate information and admit when you're not sure about something.",
    "EN: Code Helper": "You are a programming assistant. Help users with coding questions, debugging, and best practices. Provide clear explanations and code examples when appropriate.",
    "EN: Creative Writer": "You are a creative writing assistant. Help users with storytelling, character development, and creative writing techniques. Be imaginative and encouraging."
}

def respond(
    message,
    history: list[tuple[str, str]],
    preset_prompt,
    custom_prompt,
    max_tokens,
    temperature,
    top_p,
    model_name,
):
    print(f"Received message: {message}")
    print(f"History: {history}")
    
    system_message = custom_prompt if custom_prompt.strip() else SYSTEM_PROMPTS[preset_prompt]
    
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Selected model: {model_name}")

    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
            print(f"Added user message to context: {val[0]}")
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
            print(f"Added assistant message to context: {val[1]}")

    messages.append({"role": "user", "content": message})

    response = ""
    print("Sending request to Hugging Face API.")
    
    stream = client.chat.completions.create(
        model=model_name,
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True
    )

    for chunk in stream:
        try:
            # Handle Hugging Face's streaming format
            token = chunk.choices[0].delta.content
            
            if token:  # Skip empty tokens
                response += token
                yield response
                print(f"Streamed token: {token}")
        except AttributeError as e:
            print(f"Error processing chunk: {e}")
            continue

    print("Completed response generation.")

models = [
    #"microsoft/Phi-4-mini-instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
    "PowerInfer/SmallThinker-3B-Preview",
    "Qwen/QwQ-32B-Preview",
    "Qwen/Qwen2.5-Coder-32B-Instruct",
    "microsoft/Phi-3-mini-128k-instruct",
]

with gr.Blocks() as demo:
    gr.Markdown("# LLM Test")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=models, 
            value=models[0], 
            label="Select Model:"
        )

    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(
        show_label=False,
        placeholder="Enter text and press enter",
        container=False
    )
    clear = gr.Button("Clear")

    with gr.Accordion("Configuration", open=False):
        preset_prompt = gr.Dropdown(
            choices=list(SYSTEM_PROMPTS.keys()),
            value=list(SYSTEM_PROMPTS.keys())[0],
            label="Select System Prompt:"
        )
        custom_prompt = gr.Textbox(
            value="",
            label="Custom System Prompt (leaves blank to use preset):",
            lines=2
        )
        max_tokens = gr.Slider(
            minimum=1,
            maximum=8192,
            value=2048,
            step=1,
            label="Max new tokens:"
        )
        temperature = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.3,
            step=0.1,
            label="Temperature:"
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P:"
        )

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(
        history,
        preset_prompt,
        custom_prompt,
        max_tokens,
        temperature,
        top_p,
        model_name
    ):
        history[-1][1] = ""
        for character in respond(
            history[-1][0],
            history[:-1],
            preset_prompt,
            custom_prompt,
            max_tokens,
            temperature,
            top_p,
            model_name
        ):
            history[-1][1] = character
            yield history

    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, preset_prompt, custom_prompt, max_tokens, temperature, top_p, model_dropdown],
        chatbot
    )

    clear.click(lambda: None, None, chatbot, queue=False)

print("Gradio interface initialized.")

if __name__ == "__main__":
    print("Launching the demo application.")
    demo.launch()