import gradio as gr from huggingface_hub import InferenceClient """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ """ client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct") client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") client = InferenceClient("google/gemma-2-2b-it") client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct") """ client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct") def greet(prompt): response = "" for token in client.text_generation( prompt, stream=True, max_new_tokens=128, temperature=0.7, top_p=0.95 ): response += token yield response demo = gr.Interface( fn=greet, inputs=[ gr.Textbox(label="prompt", value="The huggingface_hub library is ") ], outputs=[gr.Textbox(label="result", lines=3)], api_name="generate", clear_btn=None ) if __name__ == "__main__": demo.launch()