Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
Β·
89b26fd
1
Parent(s):
bd9d50e
Update remove mpt-20 reference
Browse files
app.py
CHANGED
@@ -148,7 +148,7 @@ class GenerationConfig:
|
|
148 |
|
149 |
|
150 |
def format_prompt(system_prompt: str, user_prompt: str):
|
151 |
-
"""Format prompt based on: https://huggingface.co/spaces/mosaicml/
|
152 |
# TODO im_start/im_end possible fix for WizardCoder
|
153 |
|
154 |
system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
|
@@ -176,22 +176,22 @@ def generate(
|
|
176 |
|
177 |
|
178 |
logger.info("start dl")
|
179 |
-
_ = """full url: https://huggingface.co/TheBloke/
|
180 |
|
181 |
-
# https://huggingface.co/TheBloke/
|
182 |
_ = """
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
"""
|
189 |
MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
|
190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
192 |
DESTINATION_FOLDER = "models"
|
193 |
|
194 |
-
REPO_ID = "TheBloke/
|
195 |
if "WizardCoder" in MODEL_FILENAME:
|
196 |
REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
|
197 |
|
@@ -200,7 +200,7 @@ download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
|
200 |
logger.info("done dl")
|
201 |
|
202 |
# if "mpt" in model_filename:
|
203 |
-
# config = AutoConfig.from_pretrained("mosaicml/
|
204 |
# llm = AutoModelForCausalLM.from_pretrained(
|
205 |
# os.path.abspath(f"models/{model_filename}"),
|
206 |
# model_type="mpt",
|
@@ -264,14 +264,14 @@ css = """
|
|
264 |
"""
|
265 |
|
266 |
with gr.Blocks(
|
267 |
-
# title="
|
268 |
title=f"{MODEL_FILENAME}",
|
269 |
theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
|
270 |
css=css,
|
271 |
) as block:
|
272 |
with gr.Accordion("π Info", open=False):
|
273 |
# gr.HTML(
|
274 |
-
# """<center><a href="https://huggingface.co/spaces/mikeee/
|
275 |
# )
|
276 |
gr.Markdown(
|
277 |
f"""<h4><center>{MODEL_FILENAME}</center></h4>
|
|
|
148 |
|
149 |
|
150 |
def format_prompt(system_prompt: str, user_prompt: str):
|
151 |
+
"""Format prompt based on: https://huggingface.co/spaces/mosaicml/m pt-30b-chat/blob/main/app.py."""
|
152 |
# TODO im_start/im_end possible fix for WizardCoder
|
153 |
|
154 |
system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
|
|
|
176 |
|
177 |
|
178 |
logger.info("start dl")
|
179 |
+
_ = """full url: https://huggingface.co/TheBloke/m pt-30B-chat-GGML/blob/main/m pt-30b-chat.ggmlv0.q4_1.bin"""
|
180 |
|
181 |
+
# https://huggingface.co/TheBloke/m pt-30B-chat-GGML
|
182 |
_ = """
|
183 |
+
m pt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
|
184 |
+
m pt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
|
185 |
+
m pt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
|
186 |
+
m pt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
187 |
+
m pt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
188 |
"""
|
189 |
MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
|
190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
192 |
DESTINATION_FOLDER = "models"
|
193 |
|
194 |
+
REPO_ID = "TheBloke/m pt-30B-chat-GGML"
|
195 |
if "WizardCoder" in MODEL_FILENAME:
|
196 |
REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
|
197 |
|
|
|
200 |
logger.info("done dl")
|
201 |
|
202 |
# if "mpt" in model_filename:
|
203 |
+
# config = AutoConfig.from_pretrained("mosaicml/m pt-30b-cha t", context_length=8192)
|
204 |
# llm = AutoModelForCausalLM.from_pretrained(
|
205 |
# os.path.abspath(f"models/{model_filename}"),
|
206 |
# model_type="mpt",
|
|
|
264 |
"""
|
265 |
|
266 |
with gr.Blocks(
|
267 |
+
# title="m pt-30b-chat-ggml",
|
268 |
title=f"{MODEL_FILENAME}",
|
269 |
theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
|
270 |
css=css,
|
271 |
) as block:
|
272 |
with gr.Accordion("π Info", open=False):
|
273 |
# gr.HTML(
|
274 |
+
# """<center><a href="https://huggingface.co/spaces/mikeee/m pt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
275 |
# )
|
276 |
gr.Markdown(
|
277 |
f"""<h4><center>{MODEL_FILENAME}</center></h4>
|