ffreemt commited on
Commit
89b26fd
Β·
1 Parent(s): bd9d50e

Update remove mpt-20 reference

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -148,7 +148,7 @@ class GenerationConfig:
148
 
149
 
150
  def format_prompt(system_prompt: str, user_prompt: str):
151
- """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
152
  # TODO im_start/im_end possible fix for WizardCoder
153
 
154
  system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
@@ -176,22 +176,22 @@ def generate(
176
 
177
 
178
  logger.info("start dl")
179
- _ = """full url: https://huggingface.co/TheBloke/mpt-30B-chat-GGML/blob/main/mpt-30b-chat.ggmlv0.q4_1.bin"""
180
 
181
- # https://huggingface.co/TheBloke/mpt-30B-chat-GGML
182
  _ = """
183
- mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
184
- mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
185
- mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
186
- mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
187
- mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
188
  """
189
  MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
190
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
191
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
192
  DESTINATION_FOLDER = "models"
193
 
194
- REPO_ID = "TheBloke/mpt-30B-chat-GGML"
195
  if "WizardCoder" in MODEL_FILENAME:
196
  REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
197
 
@@ -200,7 +200,7 @@ download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
200
  logger.info("done dl")
201
 
202
  # if "mpt" in model_filename:
203
- # config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
204
  # llm = AutoModelForCausalLM.from_pretrained(
205
  # os.path.abspath(f"models/{model_filename}"),
206
  # model_type="mpt",
@@ -264,14 +264,14 @@ css = """
264
  """
265
 
266
  with gr.Blocks(
267
- # title="mpt-30b-chat-ggml",
268
  title=f"{MODEL_FILENAME}",
269
  theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
270
  css=css,
271
  ) as block:
272
  with gr.Accordion("🎈 Info", open=False):
273
  # gr.HTML(
274
- # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
275
  # )
276
  gr.Markdown(
277
  f"""<h4><center>{MODEL_FILENAME}</center></h4>
 
148
 
149
 
150
  def format_prompt(system_prompt: str, user_prompt: str):
151
+ """Format prompt based on: https://huggingface.co/spaces/mosaicml/m pt-30b-chat/blob/main/app.py."""
152
  # TODO im_start/im_end possible fix for WizardCoder
153
 
154
  system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
 
176
 
177
 
178
  logger.info("start dl")
179
+ _ = """full url: https://huggingface.co/TheBloke/m pt-30B-chat-GGML/blob/main/m pt-30b-chat.ggmlv0.q4_1.bin"""
180
 
181
+ # https://huggingface.co/TheBloke/m pt-30B-chat-GGML
182
  _ = """
183
+ m pt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
184
+ m pt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
185
+ m pt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
186
+ m pt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
187
+ m pt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
188
  """
189
  MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
190
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
191
  MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
192
  DESTINATION_FOLDER = "models"
193
 
194
+ REPO_ID = "TheBloke/m pt-30B-chat-GGML"
195
  if "WizardCoder" in MODEL_FILENAME:
196
  REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
197
 
 
200
  logger.info("done dl")
201
 
202
  # if "mpt" in model_filename:
203
+ # config = AutoConfig.from_pretrained("mosaicml/m pt-30b-cha t", context_length=8192)
204
  # llm = AutoModelForCausalLM.from_pretrained(
205
  # os.path.abspath(f"models/{model_filename}"),
206
  # model_type="mpt",
 
264
  """
265
 
266
  with gr.Blocks(
267
+ # title="m pt-30b-chat-ggml",
268
  title=f"{MODEL_FILENAME}",
269
  theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
270
  css=css,
271
  ) as block:
272
  with gr.Accordion("🎈 Info", open=False):
273
  # gr.HTML(
274
+ # """<center><a href="https://huggingface.co/spaces/mikeee/m pt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
275
  # )
276
  gr.Markdown(
277
  f"""<h4><center>{MODEL_FILENAME}</center></h4>