Spaces:
Sleeping
Sleeping
File size: 1,050 Bytes
43a62df b6a9fb8 eece981 aee66a1 5f5fa84 2837316 23deb39 a87a344 aee66a1 5302251 30b8f0f cdd8880 30b8f0f 3f05231 cdd8880 30b8f0f cdd8880 30b8f0f 930eab4 cdd8880 930eab4 3f05231 e19b4bd 30b8f0f 43a62df 30b8f0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from huggingface_hub import InferenceClient
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
"""
client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("google/gemma-2-2b-it")
client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
"""
client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
def greet(prompt):
response = ""
for token in client.text_generation(
prompt,
stream=True,
max_new_tokens=128,
temperature=0.7,
top_p=0.95
):
response += token
yield response
demo = gr.Interface(
fn=greet,
inputs=[
gr.Textbox(label="prompt", value="The huggingface_hub library is ")
],
outputs=[gr.Textbox(label="result", lines=3)],
api_name="generate",
clear_btn=None
)
if __name__ == "__main__":
demo.launch()
|