Spaces:
Sleeping
Sleeping
File size: 1,134 Bytes
a4e4083 c2c7eb4 f34eaec b7f8793 f34eaec ed0ccfa c2c7eb4 f34eaec ed0ccfa f34eaec c2c7eb4 ed0ccfa 6d9c19c c2c7eb4 6d9c19c c2c7eb4 f34eaec c2c7eb4 6d9c19c c2c7eb4 6d9c19c c2c7eb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Download GGUF model from Hugging Face Hub
MODEL_REPO = "Futuresony/gemma2-2b-gguf-q4_k_m"
MODEL_FILENAME = "gemma-2b-it-q4_k_m.gguf" # Or check exact filename on the repo
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
# Load model
llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4, verbose=True)
# Format prompt as Alpaca-style
def format_prompt(user_message):
return f"""### Instruction:
{user_message}
### Response:"""
# Chat handler
def respond(user_message, chat_history):
prompt = format_prompt(user_message)
output = llm(prompt, max_tokens=300, stop=["###"])
response = output["choices"][0]["text"].strip()
chat_history.append((user_message, response))
return "", chat_history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🤖 DStv AI Assistant - Powered by Gemma 2B GGUF")
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Ask your question...")
state = gr.State([])
msg.submit(respond, [msg, state], [msg, chatbot])
demo.launch()
|