Gumelar Teja Sukma commited on
Commit
4ac56a3
Β·
1 Parent(s): 4008d71
Files changed (2) hide show
  1. app.py +56 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer
4
+ from auto_gptq import AutoGPTQForCausalLM
5
+
6
+ # Load model & tokenizer
7
+ model_name_or_path = "TheBloke/Llama-2-7B-Chat-GPTQ"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
9
+
10
+ model = AutoGPTQForCausalLM.from_quantized(
11
+ model_name_or_path,
12
+ model_basename="model",
13
+ device_map="auto", # Auto-detects GPU/CPU
14
+ use_safetensors=True,
15
+ trust_remote_code=True,
16
+ )
17
+
18
+ # Prompt template
19
+ SYSTEM_PROMPT = "<<SYS>>\nKamu adalah asisten AI yang santuy dan suka ngoding.\n<</SYS>>\n\n"
20
+
21
+ def build_prompt(history):
22
+ prompt = f"<s>[INST] {SYSTEM_PROMPT}{history[-1][0]} [/INST]"
23
+ return prompt
24
+
25
+ def chat(user_input, chat_history):
26
+ if not chat_history:
27
+ chat_history = []
28
+
29
+ prompt = build_prompt(chat_history + [[user_input, ""]])
30
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
31
+
32
+ output_ids = model.generate(
33
+ input_ids=input_ids,
34
+ max_new_tokens=256,
35
+ do_sample=True,
36
+ temperature=0.7,
37
+ top_p=0.95,
38
+ )
39
+
40
+ generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
41
+ response = generated_text.split("[/INST]")[-1].strip()
42
+ chat_history.append([user_input, response])
43
+ return chat_history, chat_history
44
+
45
+ # Gradio UI
46
+ with gr.Blocks(title="Ujang v3 Chatbot") as demo:
47
+ gr.Markdown("### πŸ€– Ujang v3 - LLaMA 2 Chatbot GPTQ")
48
+ chatbot = gr.Chatbot()
49
+ msg = gr.Textbox(label="Ketik pesan:")
50
+ clear = gr.Button("🧹 Bersihkan")
51
+ state = gr.State([])
52
+
53
+ msg.submit(chat, [msg, state], [chatbot, state])
54
+ clear.click(lambda: ([], []), None, [chatbot, state])
55
+
56
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ gradio>=3.0.0
3
+ transformers>=4.30.0
4
+ auto-gptq>=0.4.0 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cpu/
5
+ ninja>=1.11.0
6
+ accelerate>=0.20.0
7
+ bitsandbytes>=0.40.0