File size: 1,050 Bytes
43a62df
 
 
 
 
b6a9fb8
eece981
aee66a1
5f5fa84
2837316
23deb39
a87a344
aee66a1
 
5302251
30b8f0f
cdd8880
30b8f0f
3f05231
cdd8880
30b8f0f
cdd8880
30b8f0f
 
 
 
 
 
 
 
930eab4
cdd8880
930eab4
 
3f05231
e19b4bd
30b8f0f
 
43a62df
 
30b8f0f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

"""
client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("google/gemma-2-2b-it")
client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
"""

client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")

def greet(prompt):
    response = ""
    for token in client.text_generation(
        prompt,
        stream=True,
        max_new_tokens=128,
        temperature=0.7,
        top_p=0.95
    ):
        response += token
        yield response

demo = gr.Interface(
    fn=greet,
    inputs=[
        gr.Textbox(label="prompt", value="The huggingface_hub library is ")
    ],
    outputs=[gr.Textbox(label="result", lines=3)],
    api_name="generate",
    clear_btn=None
)

if __name__ == "__main__":
    demo.launch()