Update app.py
Browse files
app.py
CHANGED
@@ -251,8 +251,12 @@ repo_id = "TheBloke/mpt-30B-chat-GGML"
|
|
251 |
_ = """
|
252 |
mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
|
253 |
mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
|
|
|
|
|
|
|
254 |
"""
|
255 |
model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
|
|
|
256 |
destination_folder = "models"
|
257 |
|
258 |
download_mpt_quant(destination_folder, repo_id, model_filename)
|
@@ -261,7 +265,7 @@ logger.info("done dl")
|
|
261 |
|
262 |
config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
|
263 |
llm = AutoModelForCausalLM.from_pretrained(
|
264 |
-
os.path.abspath("models/
|
265 |
model_type="mpt",
|
266 |
config=config,
|
267 |
)
|
@@ -299,7 +303,7 @@ css = """
|
|
299 |
"""
|
300 |
|
301 |
with gr.Blocks(
|
302 |
-
title="mpt-30b-chat-ggml",
|
303 |
theme=gr.themes.Soft(text_size="sm"),
|
304 |
css=css,
|
305 |
) as block:
|
@@ -308,7 +312,7 @@ with gr.Blocks(
|
|
308 |
"""<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
309 |
)
|
310 |
gr.Markdown(
|
311 |
-
"""<h4><center>mpt-30b-chat-ggml</center></h4>
|
312 |
|
313 |
This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).
|
314 |
|
|
|
251 |
_ = """
|
252 |
mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
|
253 |
mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
|
254 |
+
mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
|
255 |
+
mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
256 |
+
mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
257 |
"""
|
258 |
model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
|
259 |
+
model_filename = "mpt-30b-chat.ggmlv0.q5_1.bin"
|
260 |
destination_folder = "models"
|
261 |
|
262 |
download_mpt_quant(destination_folder, repo_id, model_filename)
|
|
|
265 |
|
266 |
config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
|
267 |
llm = AutoModelForCausalLM.from_pretrained(
|
268 |
+
os.path.abspath(f"models/{model_name}"),
|
269 |
model_type="mpt",
|
270 |
config=config,
|
271 |
)
|
|
|
303 |
"""
|
304 |
|
305 |
with gr.Blocks(
|
306 |
+
title="mpt-30b-chat-ggml-5bit-1",
|
307 |
theme=gr.themes.Soft(text_size="sm"),
|
308 |
css=css,
|
309 |
) as block:
|
|
|
312 |
"""<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
313 |
)
|
314 |
gr.Markdown(
|
315 |
+
"""<h4><center>mpt-30b-chat-ggml-5bit-1</center></h4>
|
316 |
|
317 |
This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).
|
318 |
|