john commited on
Commit
a9e99a7
·
1 Parent(s): 1bf80aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -15
app.py CHANGED
@@ -4,36 +4,42 @@ import gradio as gr
4
  from llama_cpp import Llama
5
  import random
6
 
7
- url = 'https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q2_K.bin'
8
  filename = wget.download(url)
9
  llm = Llama(model_path=filename, seed=random.randint(1, 2**31))
10
-
11
- with gr.Blocks() as demo:
 
 
 
 
 
 
12
  chatbot = gr.Chatbot()
13
  msg = gr.Textbox()
14
- clear = gr.UploadButton([msg, chatbot]) # Replace gr.ClearButton with gr.UploadButton
15
- # instruction = gr.Textbox(label="Instruction", placeholder="")
16
 
17
  def user(user_message, history):
18
  return gr.update(value="", interactive=True), history + [[user_message, None]]
19
 
20
  def bot(history):
21
- # instruction = history[-1][1] or ""
22
  user_message = history[-1][0]
23
- # token1 = llm.tokenize(b"### Instruction: ")
24
- # token2 = llm.tokenize(instruction.encode())
25
- token3 = llm.tokenize(b"### Input: ")
26
- tokens3 = llm.tokenize(user_message.encode())
27
- token4 = llm.tokenize(b"### Response:")
28
- tokens = token3 + tokens3 + token4
29
  history[-1][1] = ""
30
  count = 0
31
  output = ""
32
- for token in llm.generate(tokens, top_k=50, top_p=0.73, temp=0.72, repeat_penalty=1.1):
33
- text = llm.detokenize([token])
34
  output += text.decode()
35
  count += 1
36
- if count >= 500 or (token == llm.token_eos()):
37
  break
38
  history[-1][1] += text.decode()
39
  yield history
 
4
  from llama_cpp import Llama
5
  import random
6
 
7
+ url = 'https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML/resolve/main/WizardLM-7B-uncensored.ggmlv3.q4_0.bin'
8
  filename = wget.download(url)
9
  llm = Llama(model_path=filename, seed=random.randint(1, 2**31))
10
+ theme = gr.themes.Soft(
11
+ primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
12
+ neutral_hue="red",
13
+ )
14
+ title = """<h1 align="center">Chat with awesome WizardLM 7b model!</h1><br><h2 align="center">Llama cpp + Gradio!</h2>"""
15
+ with gr.Blocks(theme=theme) as demo:
16
+ gr.HTML(title)
17
+ gr.HTML("This model is awesome for its size! It is only 20th the size of Chatgpt but is around 90% as good as Chatgpt. However, please don't rely on WizardLM 7b to provide 100% true information as it might be wrong sometimes. ")
18
  chatbot = gr.Chatbot()
19
  msg = gr.Textbox()
20
+ clear = gr.ClearButton([msg, chatbot])
21
+ #instruction = gr.Textbox(label="Instruction", placeholder=)
22
 
23
  def user(user_message, history):
24
  return gr.update(value="", interactive=True), history + [[user_message, None]]
25
 
26
  def bot(history):
27
+ #instruction = history[-1][1] or ""
28
  user_message = history[-1][0]
29
+ #token1 = llm.tokenize(b"### Instruction: ")
30
+ #token2 = llm.tokenize(instruction.encode())
31
+ #token3 = llm2.tokenize(b"USER: ")
32
+ tokens3 = llm2.tokenize(user_message.encode())
33
+ token4 = llm2.tokenize(b"\n\n### Response:")
34
+ tokens = tokens3 + token4
35
  history[-1][1] = ""
36
  count = 0
37
  output = ""
38
+ for token in llm2.generate(tokens, top_k=50, top_p=0.73, temp=0.72, repeat_penalty=1.1):
39
+ text = llm2.detokenize([token])
40
  output += text.decode()
41
  count += 1
42
+ if count >= 500 or (token == llm2.token_eos()):
43
  break
44
  history[-1][1] += text.decode()
45
  yield history