WellnessWhiz

Sleeping

App Files Files Community

Johnyquest7 commited on Jul 10, 2024

Commit

d989999

verified ·

1 Parent(s): 478b5dd

Update app.py

Browse files

changed to previous chat temp

Files changed (1) hide show

app.py +79 -63

app.py CHANGED Viewed

@@ -48,6 +48,33 @@ h1 {
 }
 """
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
 model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto")  # to("cuda:0")
@@ -56,91 +83,80 @@ terminators = [
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
-@spaces.GPU(duration=120)
-def chat_llama3_8b(message: str,
-              history: list,
-              temperature: float,
-              max_new_tokens: int
-             ) -> str:
-    """
-    Generate a streaming response using the llama3-8b model.
-    Args:
-        message (str): The input message.
-        history (list): The conversation history used by ChatInterface.
-        temperature (float): The temperature for generating the response.
-        max_new_tokens (int): The maximum number of new tokens to generate.
-    Returns:
-        str: The generated response.
-    """
-    conversation = []
-    for user, assistant in history:
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        input_ids= input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
-        do_sample=True,
         temperature=temperature,
-        eos_token_id=terminators,
     )
-    # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
     if temperature == 0:
-        generate_kwargs['do_sample'] = False
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
-    outputs = []
-    for text in streamer:
-        outputs.append(text)
-        #print(outputs)
-        yield "".join(outputs)
-# Gradio block
-chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
-with gr.Blocks(fill_height=True, css=css) as demo:
-    gr.Markdown(DESCRIPTION)
-    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     gr.ChatInterface(
-        fn=chat_llama3_8b,
         chatbot=chatbot,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
-            gr.Slider(minimum=0,
-                      maximum=1,
-                      step=0.1,
-                      value=0.95,
-                      label="Temperature",
-                      render=False),
-            gr.Slider(minimum=128,
-                      maximum=4096,
-                      step=1,
-                      value=512,
-                      label="Max new tokens",
-                      render=False ),
-            ],
         examples=[
-            ['How to setup a human base on Mars? Give short answer.'],
-            ['Explain theory of relativity to me like I’m 8 years old.'],
-            ['What is 9,000 * 9,000?'],
-            ['Write a pun-filled happy birthday message to my friend Alex.'],
-            ['Justify why a penguin might make a good king of the jungle.']
-            ],
         cache_examples=False,
-                     )
-    gr.Markdown(LICENSE)
 if __name__ == "__main__":
     demo.launch()

 }
 """
+DEFAULT_SYSTEM = '''You are a expert endocrinologist and you are here to assist users with diabetes management, weight loss, and nutritional guidance. Your primary goal is to provide accurate, helpful information while maintaining an encouraging and supportive tone.'''
+TOOL_EXAMPLE = '''You have access to the following tools:
+```python
+def generate_password(length: int, include_symbols: Optional[bool]):
+    """
+    Generate a random password.
+    Args:
+        length (int): The length of the password
+        include_symbols (Optional[bool]): Include symbols in the password
+    """
+    pass
+```
+Write "Action:" followed by a list of actions in JSON that you want to call, e.g.
+Action:
+```json
+[
+    {
+        "name": "tool name (one of [generate_password])",
+        "arguments": "the input to the tool"
+    }
+]
+```
+'''
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
 model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto")  # to("cuda:0")
     tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
+@spaces.GPU
+def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
+    conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
+    for prompt, answer in history:
+        conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
     conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
+        model.device
+    )
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids=input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
+        do_sample=True,
     )
     if temperature == 0:
+        generate_kwargs["do_sample"] = False
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
+    output = ""
+    for new_token in streamer:
+        output += new_token
+        yield output
+chatbot = gr.Chatbot(height=450)
+with gr.Blocks(css=CSS) as demo:
+    gr.HTML(TITLE)
+    gr.HTML(DESCRIPTION)
+    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
+        fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
+            gr.Text(
+                value="",
+                label="System",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=0,
+                maximum=1,
+                step=0.1,
+                value=0.8,
+                label="Temperature",
+                render=False,
+            ),
+            gr.Slider(
+                minimum=128,
+                maximum=4096,
+                step=1,
+                value=1024,
+                label="Max new tokens",
+                render=False,
+            ),
+        ],
         examples=[
+            ["How do I lose weight?"],
+        ],
         cache_examples=False,
+    )
 if __name__ == "__main__":
     demo.launch()