qwen2.5-coder-generate

Sleeping

File size: 1,050 Bytes

43a62df
 
 
 
 
b6a9fb8
eece981
aee66a1
5f5fa84
2837316
23deb39
a87a344
aee66a1
 
5302251
30b8f0f
cdd8880
30b8f0f
3f05231
cdd8880
30b8f0f
cdd8880
30b8f0f
 
 
 
 
 
 
 
930eab4
cdd8880
930eab4
 
3f05231
e19b4bd
30b8f0f
 
43a62df
 
30b8f0f

import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

"""
client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("google/gemma-2-2b-it")
client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
"""

client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")

def greet(prompt):
    response = ""
    for token in client.text_generation(
        prompt,
        stream=True,
        max_new_tokens=128,
        temperature=0.7,
        top_p=0.95
    ):
        response += token
        yield response

demo = gr.Interface(
    fn=greet,
    inputs=[
        gr.Textbox(label="prompt", value="The huggingface_hub library is ")
    ],
    outputs=[gr.Textbox(label="result", lines=3)],
    api_name="generate",
    clear_btn=None
)

if __name__ == "__main__":
    demo.launch()