Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import os | |
# Retrieve the Hugging Face token from environment variables | |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
if not HF_TOKEN: | |
raise ValueError("Please set your Hugging Face API token as HF_API_TOKEN in the Secrets settings.") | |
# Model details | |
MODEL_ID = "meta-llama/llama-3.2-70b-instruct" # Change to the exact model ID | |
API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}" | |
# Headers for API requests | |
HEADERS = { | |
"Authorization": f"Bearer {HF_TOKEN}", | |
"Content-Type": "application/json" | |
} | |
def chat_with_llama(prompt, temperature=0.7, max_tokens=256): | |
"""Sends a request to Hugging Face Inference API and returns the response.""" | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"temperature": temperature, | |
"max_new_tokens": max_tokens, | |
"top_p": 0.95 | |
} | |
} | |
response = requests.post(API_URL, headers=HEADERS, json=payload) | |
if response.status_code == 200: | |
return response.json()[0]["generated_text"] | |
else: | |
return f"Error {response.status_code}: {response.text}" | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("<h2 align='center'>π Llama 3.2 3B Instruct Chatbot</h2>") | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Textbox(label="Enter your prompt:", placeholder="Ask me anything...", lines=3) | |
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature") | |
max_tokens = gr.Slider(50, 1024, value=256, label="Max Tokens") | |
submit = gr.Button("Generate Response") | |
with gr.Column(): | |
output = gr.Textbox(label="AI Response", interactive=False, lines=10) | |
submit.click(chat_with_llama, inputs=[prompt, temperature, max_tokens], outputs=output) | |
# Launch app | |
if __name__ == "__main__": | |
demo.launch() | |