import gradio as gr from huggingface_hub import InferenceClient, login import os from typing import List, Tuple, Optional # Available models for selection AVAILABLE_MODELS = [ "Qwen/Qwen3-Coder-480B-A35B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0", "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored", "VIDraft/Gemma-3-R1984-12B", ] def initialize_client(token: str, model: str) -> Optional[InferenceClient]: """Initialize the InferenceClient with the provided token and model.""" try: login(token) return InferenceClient(model=model) except Exception as e: return gr.Error(f"Failed to initialize client: {str(e)}") def respond( message: str, history: List[Tuple[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, model: str, token: str, ) -> str: """ Generate a response using the Hugging Face Inference API. Docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ if not token: raise gr.Error("Please provide a valid Hugging Face API token.") if not message.strip(): raise gr.Error("Input message cannot be empty.") client = initialize_client(token, model) if isinstance(client, gr.Error): raise client # Build message history messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) # Generate response response = "" try: for chunk in client.chat_completion( messages=messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = chunk.choices[0].delta.content or "" response += token yield response except Exception as e: raise gr.Error(f"Error during inference: {str(e)}") # Load token from environment variable for security HF_TOKEN = os.getenv("HF_TOKEN", "") # Create Gradio interface demo = gr.ChatInterface( fn=respond, additional_inputs=[ gr.Textbox( value="You are a friendly and helpful Chatbot.", label="System Message", placeholder="Enter the system prompt here...", ), gr.Slider( minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens", info="Controls the maximum length of the generated response.", ), gr.Slider( minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", info="Controls randomness (higher = more creative, lower = more deterministic).", ), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)", info="Controls diversity via nucleus sampling.", ), gr.Dropdown( choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="Model Selection", info="Select the model to use for inference.", ), gr.Textbox( value=HF_TOKEN, label="Hugging Face API Token", type="password", placeholder="Enter your HF API token (or set HF_TOKEN env variable)", ), ], title="Chatbot with Hugging Face Inference API", description="Interact with a chatbot powered by Hugging Face models. Provide your API token and customize settings.", theme="base", ) if __name__ == "__main__": demo.launch()