Spaces:
Sleeping
Sleeping
Update app.py
Browse fileschanged to previous chat temp
app.py
CHANGED
@@ -48,6 +48,33 @@ h1 {
|
|
48 |
}
|
49 |
"""
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# Load the tokenizer and model
|
52 |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
|
53 |
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # to("cuda:0")
|
@@ -56,91 +83,80 @@ terminators = [
|
|
56 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
57 |
]
|
58 |
|
59 |
-
@spaces.GPU
|
60 |
-
def
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
"""
|
66 |
-
Generate a streaming response using the llama3-8b model.
|
67 |
-
Args:
|
68 |
-
message (str): The input message.
|
69 |
-
history (list): The conversation history used by ChatInterface.
|
70 |
-
temperature (float): The temperature for generating the response.
|
71 |
-
max_new_tokens (int): The maximum number of new tokens to generate.
|
72 |
-
Returns:
|
73 |
-
str: The generated response.
|
74 |
-
"""
|
75 |
-
conversation = []
|
76 |
-
for user, assistant in history:
|
77 |
-
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
78 |
conversation.append({"role": "user", "content": message})
|
79 |
|
80 |
-
input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(
|
81 |
-
|
|
|
82 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
83 |
|
84 |
generate_kwargs = dict(
|
85 |
-
input_ids=
|
86 |
streamer=streamer,
|
87 |
max_new_tokens=max_new_tokens,
|
88 |
-
do_sample=True,
|
89 |
temperature=temperature,
|
90 |
-
|
91 |
)
|
92 |
-
# This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
|
93 |
if temperature == 0:
|
94 |
-
generate_kwargs[
|
95 |
-
|
96 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
97 |
t.start()
|
98 |
|
99 |
-
|
100 |
-
for
|
101 |
-
|
102 |
-
|
103 |
-
yield "".join(outputs)
|
104 |
-
|
105 |
|
106 |
-
# Gradio block
|
107 |
-
chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
gr.
|
|
|
|
|
113 |
gr.ChatInterface(
|
114 |
-
fn=
|
115 |
chatbot=chatbot,
|
116 |
fill_height=True,
|
117 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
118 |
additional_inputs=[
|
119 |
-
gr.
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
examples=[
|
133 |
-
|
134 |
-
[
|
135 |
-
|
136 |
-
['Write a pun-filled happy birthday message to my friend Alex.'],
|
137 |
-
['Justify why a penguin might make a good king of the jungle.']
|
138 |
-
],
|
139 |
cache_examples=False,
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
if __name__ == "__main__":
|
145 |
demo.launch()
|
146 |
|
|
|
48 |
}
|
49 |
"""
|
50 |
|
51 |
+
DEFAULT_SYSTEM = '''You are a expert endocrinologist and you are here to assist users with diabetes management, weight loss, and nutritional guidance. Your primary goal is to provide accurate, helpful information while maintaining an encouraging and supportive tone.'''
|
52 |
+
|
53 |
+
TOOL_EXAMPLE = '''You have access to the following tools:
|
54 |
+
```python
|
55 |
+
def generate_password(length: int, include_symbols: Optional[bool]):
|
56 |
+
"""
|
57 |
+
Generate a random password.
|
58 |
+
Args:
|
59 |
+
length (int): The length of the password
|
60 |
+
include_symbols (Optional[bool]): Include symbols in the password
|
61 |
+
"""
|
62 |
+
pass
|
63 |
+
```
|
64 |
+
Write "Action:" followed by a list of actions in JSON that you want to call, e.g.
|
65 |
+
Action:
|
66 |
+
```json
|
67 |
+
[
|
68 |
+
{
|
69 |
+
"name": "tool name (one of [generate_password])",
|
70 |
+
"arguments": "the input to the tool"
|
71 |
+
}
|
72 |
+
]
|
73 |
+
```
|
74 |
+
'''
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
# Load the tokenizer and model
|
79 |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
|
80 |
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # to("cuda:0")
|
|
|
83 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
84 |
]
|
85 |
|
86 |
+
@spaces.GPU
|
87 |
+
def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
|
88 |
+
conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
|
89 |
+
for prompt, answer in history:
|
90 |
+
conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
|
91 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
conversation.append({"role": "user", "content": message})
|
93 |
|
94 |
+
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
|
95 |
+
model.device
|
96 |
+
)
|
97 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
98 |
|
99 |
generate_kwargs = dict(
|
100 |
+
input_ids=input_ids,
|
101 |
streamer=streamer,
|
102 |
max_new_tokens=max_new_tokens,
|
|
|
103 |
temperature=temperature,
|
104 |
+
do_sample=True,
|
105 |
)
|
|
|
106 |
if temperature == 0:
|
107 |
+
generate_kwargs["do_sample"] = False
|
108 |
+
|
109 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
110 |
t.start()
|
111 |
|
112 |
+
output = ""
|
113 |
+
for new_token in streamer:
|
114 |
+
output += new_token
|
115 |
+
yield output
|
|
|
|
|
116 |
|
|
|
|
|
117 |
|
118 |
+
chatbot = gr.Chatbot(height=450)
|
119 |
+
|
120 |
+
with gr.Blocks(css=CSS) as demo:
|
121 |
+
gr.HTML(TITLE)
|
122 |
+
gr.HTML(DESCRIPTION)
|
123 |
+
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
|
124 |
gr.ChatInterface(
|
125 |
+
fn=stream_chat,
|
126 |
chatbot=chatbot,
|
127 |
fill_height=True,
|
128 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
129 |
additional_inputs=[
|
130 |
+
gr.Text(
|
131 |
+
value="",
|
132 |
+
label="System",
|
133 |
+
render=False,
|
134 |
+
),
|
135 |
+
gr.Slider(
|
136 |
+
minimum=0,
|
137 |
+
maximum=1,
|
138 |
+
step=0.1,
|
139 |
+
value=0.8,
|
140 |
+
label="Temperature",
|
141 |
+
render=False,
|
142 |
+
),
|
143 |
+
gr.Slider(
|
144 |
+
minimum=128,
|
145 |
+
maximum=4096,
|
146 |
+
step=1,
|
147 |
+
value=1024,
|
148 |
+
label="Max new tokens",
|
149 |
+
render=False,
|
150 |
+
),
|
151 |
+
],
|
152 |
examples=[
|
153 |
+
|
154 |
+
["How do I lose weight?"],
|
155 |
+
],
|
|
|
|
|
|
|
156 |
cache_examples=False,
|
157 |
+
)
|
158 |
+
|
159 |
+
|
|
|
160 |
if __name__ == "__main__":
|
161 |
demo.launch()
|
162 |
|