Spaces:

ShikharLLM
/

science

Sleeping

App Files Files Community

science / app.py

ShikharLLM

Update app.py

cbed0e7 verified 19 days ago

raw

history blame contribute delete

1.87 kB

	import gradio as gr
	import requests
	import os

	# Retrieve the Hugging Face token from environment variables
	HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
	if not HF_TOKEN:
	raise ValueError("Please set your Hugging Face API token as HF_API_TOKEN in the Secrets settings.")

	# Model details
	MODEL_ID = "meta-llama/llama-3.2-70b-instruct" # Change to the exact model ID
	API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"

	# Headers for API requests
	HEADERS = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}

	def chat_with_llama(prompt, temperature=0.7, max_tokens=256):
	"""Sends a request to Hugging Face Inference API and returns the response."""
	payload = {
	"inputs": prompt,
	"parameters": {
	"temperature": temperature,
	"max_new_tokens": max_tokens,
	"top_p": 0.95
	}
	}

	response = requests.post(API_URL, headers=HEADERS, json=payload)

	if response.status_code == 200:
	return response.json()[0]["generated_text"]
	else:
	return f"Error {response.status_code}: {response.text}"

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("<h2 align='center'>🚀 Llama 3.2 3B Instruct Chatbot</h2>")

	with gr.Row():
	with gr.Column():
	prompt = gr.Textbox(label="Enter your prompt:", placeholder="Ask me anything...", lines=3)
	temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
	max_tokens = gr.Slider(50, 1024, value=256, label="Max Tokens")
	submit = gr.Button("Generate Response")

	with gr.Column():
	output = gr.Textbox(label="AI Response", interactive=False, lines=10)

	submit.click(chat_with_llama, inputs=[prompt, temperature, max_tokens], outputs=output)

	# Launch app
	if __name__ == "__main__":
	demo.launch()