mpt-30b-chat-gglm-5bit

Paused

mikeee commited on Jul 3, 2023

Commit

c4c59a2

1 Parent(s): e4f4dd2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -251,8 +251,12 @@ repo_id = "TheBloke/mpt-30B-chat-GGML"
 _ = """
 mpt-30b-chat.ggmlv0.q4_0.bin 	q4_0 	4 	16.85 GB 	19.35 GB 	4-bit.
 mpt-30b-chat.ggmlv0.q4_1.bin 	q4_1 	4 	18.73 GB 	21.23 GB 	4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
 """
 model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
 destination_folder = "models"
 download_mpt_quant(destination_folder, repo_id, model_filename)
@@ -261,7 +265,7 @@ logger.info("done dl")
 config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
 llm = AutoModelForCausalLM.from_pretrained(
-    os.path.abspath("models/mpt-30b-chat.ggmlv0.q4_1.bin"),
     model_type="mpt",
     config=config,
 )
@@ -299,7 +303,7 @@ css = """
 """
 with gr.Blocks(
-    title="mpt-30b-chat-ggml",
     theme=gr.themes.Soft(text_size="sm"),
     css=css,
 ) as block:
@@ -308,7 +312,7 @@ with gr.Blocks(
             """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
         )
         gr.Markdown(
-            """<h4><center>mpt-30b-chat-ggml</center></h4>
             This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).

 _ = """
 mpt-30b-chat.ggmlv0.q4_0.bin 	q4_0 	4 	16.85 GB 	19.35 GB 	4-bit.
 mpt-30b-chat.ggmlv0.q4_1.bin 	q4_1 	4 	18.73 GB 	21.23 GB 	4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
+mpt-30b-chat.ggmlv0.q5_0.bin 	q5_0 	5 	20.60 GB 	23.10 GB
+mpt-30b-chat.ggmlv0.q5_1.bin 	q5_1 	5 	22.47 GB 	24.97 GB
+mpt-30b-chat.ggmlv0.q8_0.bin 	q8_0 	8 	31.83 GB 	34.33 GB
 """
 model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
+model_filename = "mpt-30b-chat.ggmlv0.q5_1.bin"
 destination_folder = "models"
 download_mpt_quant(destination_folder, repo_id, model_filename)
 config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
 llm = AutoModelForCausalLM.from_pretrained(
+    os.path.abspath(f"models/{model_name}"),
     model_type="mpt",
     config=config,
 )
 """
 with gr.Blocks(
+    title="mpt-30b-chat-ggml-5bit-1",
     theme=gr.themes.Soft(text_size="sm"),
     css=css,
 ) as block:
             """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
         )
         gr.Markdown(
+            """<h4><center>mpt-30b-chat-ggml-5bit-1</center></h4>
             This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).