Johnyquest7 commited on
Commit
d989999
·
verified ·
1 Parent(s): 478b5dd

Update app.py

Browse files

changed to previous chat temp

Files changed (1) hide show
  1. app.py +79 -63
app.py CHANGED
@@ -48,6 +48,33 @@ h1 {
48
  }
49
  """
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # Load the tokenizer and model
52
  tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
53
  model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # to("cuda:0")
@@ -56,91 +83,80 @@ terminators = [
56
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
57
  ]
58
 
59
- @spaces.GPU(duration=120)
60
- def chat_llama3_8b(message: str,
61
- history: list,
62
- temperature: float,
63
- max_new_tokens: int
64
- ) -> str:
65
- """
66
- Generate a streaming response using the llama3-8b model.
67
- Args:
68
- message (str): The input message.
69
- history (list): The conversation history used by ChatInterface.
70
- temperature (float): The temperature for generating the response.
71
- max_new_tokens (int): The maximum number of new tokens to generate.
72
- Returns:
73
- str: The generated response.
74
- """
75
- conversation = []
76
- for user, assistant in history:
77
- conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
78
  conversation.append({"role": "user", "content": message})
79
 
80
- input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
81
-
 
82
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
83
 
84
  generate_kwargs = dict(
85
- input_ids= input_ids,
86
  streamer=streamer,
87
  max_new_tokens=max_new_tokens,
88
- do_sample=True,
89
  temperature=temperature,
90
- eos_token_id=terminators,
91
  )
92
- # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
93
  if temperature == 0:
94
- generate_kwargs['do_sample'] = False
95
-
96
  t = Thread(target=model.generate, kwargs=generate_kwargs)
97
  t.start()
98
 
99
- outputs = []
100
- for text in streamer:
101
- outputs.append(text)
102
- #print(outputs)
103
- yield "".join(outputs)
104
-
105
 
106
- # Gradio block
107
- chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
108
 
109
- with gr.Blocks(fill_height=True, css=css) as demo:
110
-
111
- gr.Markdown(DESCRIPTION)
112
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
 
 
113
  gr.ChatInterface(
114
- fn=chat_llama3_8b,
115
  chatbot=chatbot,
116
  fill_height=True,
117
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
118
  additional_inputs=[
119
- gr.Slider(minimum=0,
120
- maximum=1,
121
- step=0.1,
122
- value=0.95,
123
- label="Temperature",
124
- render=False),
125
- gr.Slider(minimum=128,
126
- maximum=4096,
127
- step=1,
128
- value=512,
129
- label="Max new tokens",
130
- render=False ),
131
- ],
 
 
 
 
 
 
 
 
 
132
  examples=[
133
- ['How to setup a human base on Mars? Give short answer.'],
134
- ['Explain theory of relativity to me like I’m 8 years old.'],
135
- ['What is 9,000 * 9,000?'],
136
- ['Write a pun-filled happy birthday message to my friend Alex.'],
137
- ['Justify why a penguin might make a good king of the jungle.']
138
- ],
139
  cache_examples=False,
140
- )
141
-
142
- gr.Markdown(LICENSE)
143
-
144
  if __name__ == "__main__":
145
  demo.launch()
146
 
 
48
  }
49
  """
50
 
51
+ DEFAULT_SYSTEM = '''You are a expert endocrinologist and you are here to assist users with diabetes management, weight loss, and nutritional guidance. Your primary goal is to provide accurate, helpful information while maintaining an encouraging and supportive tone.'''
52
+
53
+ TOOL_EXAMPLE = '''You have access to the following tools:
54
+ ```python
55
+ def generate_password(length: int, include_symbols: Optional[bool]):
56
+ """
57
+ Generate a random password.
58
+ Args:
59
+ length (int): The length of the password
60
+ include_symbols (Optional[bool]): Include symbols in the password
61
+ """
62
+ pass
63
+ ```
64
+ Write "Action:" followed by a list of actions in JSON that you want to call, e.g.
65
+ Action:
66
+ ```json
67
+ [
68
+ {
69
+ "name": "tool name (one of [generate_password])",
70
+ "arguments": "the input to the tool"
71
+ }
72
+ ]
73
+ ```
74
+ '''
75
+
76
+
77
+
78
  # Load the tokenizer and model
79
  tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
80
  model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # to("cuda:0")
 
83
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
84
  ]
85
 
86
+ @spaces.GPU
87
+ def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
88
+ conversation = [{"role": "system", "content": system or DEFAULT_SYSTEM}]
89
+ for prompt, answer in history:
90
+ conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
91
+
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  conversation.append({"role": "user", "content": message})
93
 
94
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(
95
+ model.device
96
+ )
97
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
98
 
99
  generate_kwargs = dict(
100
+ input_ids=input_ids,
101
  streamer=streamer,
102
  max_new_tokens=max_new_tokens,
 
103
  temperature=temperature,
104
+ do_sample=True,
105
  )
 
106
  if temperature == 0:
107
+ generate_kwargs["do_sample"] = False
108
+
109
  t = Thread(target=model.generate, kwargs=generate_kwargs)
110
  t.start()
111
 
112
+ output = ""
113
+ for new_token in streamer:
114
+ output += new_token
115
+ yield output
 
 
116
 
 
 
117
 
118
+ chatbot = gr.Chatbot(height=450)
119
+
120
+ with gr.Blocks(css=CSS) as demo:
121
+ gr.HTML(TITLE)
122
+ gr.HTML(DESCRIPTION)
123
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
124
  gr.ChatInterface(
125
+ fn=stream_chat,
126
  chatbot=chatbot,
127
  fill_height=True,
128
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
129
  additional_inputs=[
130
+ gr.Text(
131
+ value="",
132
+ label="System",
133
+ render=False,
134
+ ),
135
+ gr.Slider(
136
+ minimum=0,
137
+ maximum=1,
138
+ step=0.1,
139
+ value=0.8,
140
+ label="Temperature",
141
+ render=False,
142
+ ),
143
+ gr.Slider(
144
+ minimum=128,
145
+ maximum=4096,
146
+ step=1,
147
+ value=1024,
148
+ label="Max new tokens",
149
+ render=False,
150
+ ),
151
+ ],
152
  examples=[
153
+
154
+ ["How do I lose weight?"],
155
+ ],
 
 
 
156
  cache_examples=False,
157
+ )
158
+
159
+
 
160
  if __name__ == "__main__":
161
  demo.launch()
162