Spaces:

Futuresony
/

FutureX

Sleeping

FutureX / app.py

Update app.py

f34eaec verified 4 months ago

1.13 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	# Download GGUF model from Hugging Face Hub
	MODEL_REPO = "Futuresony/gemma2-2b-gguf-q4_k_m"
	MODEL_FILENAME = "gemma-2b-it-q4_k_m.gguf" # Or check exact filename on the repo

	model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)

	# Load model
	llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4, verbose=True)

	# Format prompt as Alpaca-style
	def format_prompt(user_message):
	return f"""### Instruction:
	{user_message}

	### Response:"""

	# Chat handler
	def respond(user_message, chat_history):
	prompt = format_prompt(user_message)
	output = llm(prompt, max_tokens=300, stop=["###"])
	response = output["choices"][0]["text"].strip()
	chat_history.append((user_message, response))
	return "", chat_history

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## 🤖 DStv AI Assistant - Powered by Gemma 2B GGUF")
	chatbot = gr.Chatbot()
	msg = gr.Textbox(placeholder="Ask your question...")
	state = gr.State([])

	msg.submit(respond, [msg, state], [msg, chatbot])

	demo.launch()