import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

"""
client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("google/gemma-2-2b-it")
client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
"""

client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")

def greet(prompt):
    response = ""
    for token in client.text_generation(
        prompt,
        stream=True,
        max_new_tokens=128,
        temperature=0.7,
        top_p=0.95
    ):
        response += token
        yield response

demo = gr.Interface(
    fn=greet,
    inputs=[
        gr.Textbox(label="prompt", value="The huggingface_hub library is ")
    ],
    outputs=[gr.Textbox(label="result", lines=3)],
    api_name="generate",
    clear_btn=None
)

if __name__ == "__main__":
    demo.launch()