from huggingface_hub import InferenceClient import gradio as gr client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") def generate(prompt, temperature=0.2, max_new_tokens=30000, top_p=0.9, repetition_penalty=1.0): temperature = max(float(temperature), 0.01) top_p = max(min(float(top_p), 1.0), 0.0) repetition_penalty = max(float(repetition_penalty), 0.01) generate_kwargs = { "temperature": temperature, "max_new_tokens": max_new_tokens, "top_p": top_p, "repetition_penalty": repetition_penalty, "do_sample": True, "seed": 42, } response = client.text_generation(prompt, **generate_kwargs) generated_text = response["generated_text"] return generated_text iface = gr.Interface( fn=generate, inputs=["text", gr.inputs.Slider(0.1, 2.0), gr.inputs.Slider(100, 50000), gr.inputs.Slider(0.1, 1.0)], outputs="text", title="Text Generation" ) iface.launch()