Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
# Download GGUF model from Hugging Face Hub | |
MODEL_REPO = "Futuresony/gemma2-2b-gguf-q4_k_m" | |
MODEL_FILENAME = "gemma-2b-it-q4_k_m.gguf" # Or check exact filename on the repo | |
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME) | |
# Load model | |
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4, verbose=True) | |
# Format prompt as Alpaca-style | |
def format_prompt(user_message): | |
return f"""### Instruction: | |
{user_message} | |
### Response:""" | |
# Chat handler | |
def respond(user_message, chat_history): | |
prompt = format_prompt(user_message) | |
output = llm(prompt, max_tokens=300, stop=["###"]) | |
response = output["choices"][0]["text"].strip() | |
chat_history.append((user_message, response)) | |
return "", chat_history | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("## 🤖 DStv AI Assistant - Powered by Gemma 2B GGUF") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(placeholder="Ask your question...") | |
state = gr.State([]) | |
msg.submit(respond, [msg, state], [msg, chatbot]) | |
demo.launch() | |