Spaces:

Gumelarteja
/

ujangv3

Runtime error

App Files Files Community

Gumelar Teja Sukma commited on Jun 3

Commit

4ac56a3

1 Parent(s): 4008d71

init

Browse files

Files changed (2) hide show

app.py +56 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+import gradio as gr
+from transformers import AutoTokenizer
+from auto_gptq import AutoGPTQForCausalLM
+# Load model & tokenizer
+model_name_or_path = "TheBloke/Llama-2-7B-Chat-GPTQ"
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+model = AutoGPTQForCausalLM.from_quantized(
+    model_name_or_path,
+    model_basename="model",
+    device_map="auto",  # Auto-detects GPU/CPU
+    use_safetensors=True,
+    trust_remote_code=True,
+)
+# Prompt template
+SYSTEM_PROMPT = "<<SYS>>\nKamu adalah asisten AI yang santuy dan suka ngoding.\n<</SYS>>\n\n"
+def build_prompt(history):
+    prompt = f"<s>[INST] {SYSTEM_PROMPT}{history[-1][0]} [/INST]"
+    return prompt
+def chat(user_input, chat_history):
+    if not chat_history:
+        chat_history = []
+    prompt = build_prompt(chat_history + [[user_input, ""]])
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
+    output_ids = model.generate(
+        input_ids=input_ids,
+        max_new_tokens=256,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.95,
+    )
+    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    response = generated_text.split("[/INST]")[-1].strip()
+    chat_history.append([user_input, response])
+    return chat_history, chat_history
+# Gradio UI
+with gr.Blocks(title="Ujang v3 Chatbot") as demo:
+    gr.Markdown("### 🤖 Ujang v3 - LLaMA 2 Chatbot GPTQ")
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(label="Ketik pesan:")
+    clear = gr.Button("🧹 Bersihkan")
+    state = gr.State([])
+    msg.submit(chat, [msg, state], [chatbot, state])
+    clear.click(lambda: ([], []), None, [chatbot, state])
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch>=2.0.0
+gradio>=3.0.0
+transformers>=4.30.0
+auto-gptq>=0.4.0 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cpu/
+ninja>=1.11.0
+accelerate>=0.20.0
+bitsandbytes>=0.40.0