Spaces:

mikeee
/

WizardCoder-15B-1.0-GGML

Runtime error

App Files Files Community

ffreemt commited on Jul 5, 2023

Commit

8708b41

1 Parent(s): e7193a9

Update

Browse files

Files changed (1) hide show

app.py +19 -10

app.py CHANGED Viewed

@@ -50,6 +50,9 @@ def predict(prompt, bot):
                 system_prompt=default_system_prompt,
                 user_prompt=prompt.strip(),
             )
             print(assistant_prefix, end=" ", flush=True)
             response = ""
@@ -91,10 +94,12 @@ def predict_api(prompt):
             max_new_tokens=512,  # adjust as needed
             seed=42,
             reset=False,  # reset history (cache)
-            stream=False,  # streaming per word/token
             threads=os.cpu_count() // 2,  # type: ignore  # adjust for your CPU
             stop=["<|im_end|>", "|<"],
         )
         generator = generate(
             LLM, _, system_prompt=default_system_prompt, user_prompt=prompt.strip()
         )
@@ -144,7 +149,7 @@ class GenerationConfig:
 def format_prompt(system_prompt: str, user_prompt: str):
     """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
-    # May need to be modified for WizardCoder: TODO
     system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
     user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
@@ -229,6 +234,9 @@ if "WizardCoder" in MODEL_FILENAME:
     # threads=os.cpu_count() // 2  # type: ignore
 # )
 GENERATION_CONFIG = GenerationConfig(
     temperature=0.2,
     top_k=0,
@@ -238,8 +246,8 @@ GENERATION_CONFIG = GenerationConfig(
     seed=42,
     reset=False,  # reset history (cache)
     stream=True,  # streaming per word/token
-    threads=os.cpu_count() // 2,  # type: ignore  # adjust for your CPU
-    stop=["<|im_end|>", "|<"],
 )
 css = """
@@ -273,13 +281,14 @@ with gr.Blocks(
             Try to refresh the browser and try again when occasionally errors occur.
-            It takes about >100 seconds to get a response. Restarting the space takes about 5 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 5 minutes to restart. You need to refresh the browser to reload the new space.
             """,
             elem_classes="xsmall",
         )
-    chatbot = gr.Chatbot(scroll_to_output=True).style(height=700)  # 500
-    buff = gr.Textbox(show_label=False)
     with gr.Row():
         with gr.Column(scale=4):
             msg = gr.Textbox(
@@ -287,7 +296,7 @@ with gr.Blocks(
                 placeholder="Ask me anything (press Enter or click Submit to send)",
                 show_label=False,
             ).style(container=False)
-        with gr.Column(scale=1):
             with gr.Row():
                 submit = gr.Button("Submit", elem_classes="xsmall")
                 stop = gr.Button("Stop", visible=False)
@@ -306,7 +315,7 @@ with gr.Blocks(
                         change = gr.Button("Change System Prompt")
                         reset = gr.Button("Reset System Prompt")
-    with gr.Accordion("Example inputs", open=True):
         etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
         examples = gr.Examples(
             examples=[
@@ -358,7 +367,7 @@ with gr.Blocks(
         fn=predict,
         inputs=[msg, chatbot],
         outputs=[msg, chatbot],
-        queue=True,
         show_progress="full",
         api_name="predict",
     )

                 system_prompt=default_system_prompt,
                 user_prompt=prompt.strip(),
             )
+            ns.generator = generator  # for .then
             print(assistant_prefix, end=" ", flush=True)
             response = ""
             max_new_tokens=512,  # adjust as needed
             seed=42,
             reset=False,  # reset history (cache)
+            stream=True,  # TODO stream=False and generator
             threads=os.cpu_count() // 2,  # type: ignore  # adjust for your CPU
             stop=["<|im_end|>", "|<"],
         )
+        # TODO stream does not make sense in api?
         generator = generate(
             LLM, _, system_prompt=default_system_prompt, user_prompt=prompt.strip()
         )
 def format_prompt(system_prompt: str, user_prompt: str):
     """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
+    # TODO im_start/im_end possible fix for WizardCoder
     system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
     user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
     # threads=os.cpu_count() // 2  # type: ignore
 # )
+cpu_count = os.cpu_count() // 2  # type: ignore
+logger.debug(f"{cpu_count=}")
 GENERATION_CONFIG = GenerationConfig(
     temperature=0.2,
     top_k=0,
     seed=42,
     reset=False,  # reset history (cache)
     stream=True,  # streaming per word/token
+    threads=cpu_count,
+    stop=["<|im_end|>", "|<"],  # TODO possible fix of stop
 )
 css = """
             Try to refresh the browser and try again when occasionally errors occur.
+            It takes about >100 seconds to get a response. Restarting the space takes about 2 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 2 minutes to restart. You need to refresh the browser to reload the new space.
             """,
             elem_classes="xsmall",
         )
+    # chatbot = gr.Chatbot().style(height=700)  # 500
+    chatbot = gr.Chatbot(height=700)  # 500
+    buff = gr.Textbox(show_label=False, visible=False)
     with gr.Row():
         with gr.Column(scale=4):
             msg = gr.Textbox(
                 placeholder="Ask me anything (press Enter or click Submit to send)",
                 show_label=False,
             ).style(container=False)
+        with gr.Column(scale=1, min_width=100):
             with gr.Row():
                 submit = gr.Button("Submit", elem_classes="xsmall")
                 stop = gr.Button("Stop", visible=False)
                         change = gr.Button("Change System Prompt")
                         reset = gr.Button("Reset System Prompt")
+    with gr.Accordion("Example Inputs", open=True):
         etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
         examples = gr.Examples(
             examples=[
         fn=predict,
         inputs=[msg, chatbot],
         outputs=[msg, chatbot],
+        # queue=True,
         show_progress="full",
         api_name="predict",
     )