wb-droid commited on
Commit
b4eccd0
·
1 Parent(s): 312c695

first commit.

Browse files
Files changed (3) hide show
  1. app.py +92 -0
  2. chatglm3-ggml.bin +3 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reference:
2
+ # https://github.com/li-plus/chatglm.cpp
3
+ # https://github.com/li-plus/chatglm.cpp/blob/main/examples/web_demo.py
4
+
5
+ import chatglm_cpp
6
+ import gradio as gr
7
+ import argparse
8
+ from pathlib import Path
9
+
10
+ pipeline = chatglm_cpp.Pipeline("./chatglm3-ggml.bin")
11
+
12
+ max_length = 2048
13
+ top_p = 0.4
14
+ temp = 0.95
15
+ max_context_length=512
16
+ mode = "chat"
17
+ top_k = 0
18
+ repeat_penalty = 1.0
19
+ threads = 0
20
+
21
+ def postprocess(text):
22
+ #if args.plain:
23
+ # return f"<pre>{text}</pre>"
24
+ return text
25
+
26
+ def predict(input, chatbot, max_length, top_p, temperature, messages):
27
+ chatbot.append((postprocess(input), ""))
28
+ messages.append(chatglm_cpp.ChatMessage(role="user", content=input))
29
+
30
+ generation_kwargs = dict(
31
+ max_length=max_length,
32
+ max_context_length=max_context_length,
33
+ do_sample=temperature > 0,
34
+ top_k=top_k,
35
+ top_p=top_p,
36
+ temperature=temperature,
37
+ repetition_penalty=repeat_penalty,
38
+ num_threads=threads,
39
+ stream=True,
40
+ )
41
+
42
+ response = ""
43
+ if mode == "chat":
44
+ chunks = []
45
+ for chunk in pipeline.chat(messages, **generation_kwargs):
46
+ response += chunk.content
47
+ chunks.append(chunk)
48
+ chatbot[-1] = (chatbot[-1][0], postprocess(response))
49
+ yield chatbot, messages
50
+ messages.append(pipeline.merge_streaming_messages(chunks))
51
+ else:
52
+ for chunk in pipeline.generate(input, **generation_kwargs):
53
+ response += chunk
54
+ chatbot[-1] = (chatbot[-1][0], postprocess(response))
55
+ yield chatbot, messages
56
+
57
+ yield chatbot, messages
58
+
59
+ def reset_user_input():
60
+ return gr.update(value="")
61
+
62
+
63
+ def reset_state():
64
+ return [], []
65
+
66
+ with gr.Blocks() as demo:
67
+ gr.HTML("""<h1 align="center">ChatGLM3 Quantized by ChatGLM.cpp. Reduce size from 12G to 3.4G.</h1>""")
68
+
69
+ chatbot = gr.Chatbot()
70
+ with gr.Row():
71
+ with gr.Column(scale=4):
72
+ user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=8)
73
+ submitBtn = gr.Button("Submit", variant="primary")
74
+ with gr.Column(scale=1):
75
+ max_length = gr.Slider(0, 2048, value=max_length, step=1.0, label="Maximum Length", interactive=True)
76
+ top_p = gr.Slider(0, 1, value=top_p, step=0.01, label="Top P", interactive=True)
77
+ temperature = gr.Slider(0, 1, value=temp, step=0.01, label="Temperature", interactive=True)
78
+ emptyBtn = gr.Button("Clear History")
79
+
80
+ messages = gr.State([])
81
+
82
+ submitBtn.click(
83
+ predict,
84
+ [user_input, chatbot, max_length, top_p, temperature, messages],
85
+ [chatbot, messages],
86
+ show_progress=True,
87
+ )
88
+ submitBtn.click(reset_user_input, [], [user_input])
89
+
90
+ emptyBtn.click(reset_state, outputs=[chatbot, messages], show_progress=True)
91
+
92
+ demo.queue().launch(share=False, inbrowser=True)
chatglm3-ggml.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad2e7e172102949f9e4930b7fcf4bd88ed1a32307e69f36ce5f1571e4ac5a81
3
+ size 3514297136
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ tabulate
3
+ tqdm
4
+ transformers
5
+ accelerate
6
+ sentencepiece