Mimi commited on
Commit
5ed7b6c
·
1 Parent(s): 2f61f40

add new file

Browse files
Files changed (4) hide show
  1. .gitignore +1 -1
  2. agent.py +19 -20
  3. app.py +32 -26
  4. data_utils.py +256 -0
.gitignore CHANGED
@@ -131,4 +131,4 @@ dmypy.json
131
 
132
  .vscode/
133
  model_hugger.py
134
- *.key
 
131
 
132
  .vscode/
133
  model_hugger.py
134
+ demo.ipynb
agent.py CHANGED
@@ -11,17 +11,16 @@ Return:
11
 
12
 
13
  import time
14
- from uuid import uuid4
 
15
  from llama_cpp import Llama
16
  from llama_cpp.llama_tokenizer import LlamaHFTokenizer
17
 
18
  # default decoding params initiation
19
  SEED = 42
20
-
21
  MODEL_CARD = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
22
  MODEL_PATH = "Meta-Llama-3.1-8B-Instruct-Q3_K_XL.gguf"
23
  base_model_id = "meta-llama/Llama-3.1-8B-Instruct"
24
-
25
  new_chat_template = """{{- bos_token }}
26
  {%- if custom_tools is defined %}
27
  {%- set tools = custom_tools %}
@@ -108,46 +107,46 @@ new_chat_template = """{{- bos_token }}
108
  {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
109
  {%- endif %}"""
110
 
111
- datetime_format = '%Y-%m-%d %H:%M:%S'
112
-
113
- from datetime import datetime
114
-
115
  class Naomi:
116
  def __init__(self, **kwargs):
117
- self.session_id = uuid4().hex
118
- self.candidate = kwargs
119
-
120
  # load the model
121
  self.model = Llama.from_pretrained(
122
  repo_id=MODEL_CARD,
123
  filename=MODEL_PATH,
124
  tokenizer=LlamaHFTokenizer.from_pretrained(base_model_id)
125
  )
126
-
127
  self.model.tokenizer_.hf_tokenizer.chat_template = new_chat_template
 
128
  # load the agents prompts
 
129
  self.chat_history = self.model.tokenizer_.hf_tokenizer.apply_chat_template(
 
 
130
  )
131
 
132
- self.timestamps = []
133
-
134
- def invoke(self, history, **kwargs):
135
  """ Invoked during stream. """
136
  # user msg handling
137
- self.timestamps += [datetime.now().strftime(datetime_format)]
138
- format_user_input = self.model.tokenizer_.hf_tokenizer.apply_chat_template(history[-1], tokenize=False, add_generation_prompt=False)
139
  self.chat_history += format_user_input
140
  # agent msg results + clean
141
  response = self.model(self.chat_history, **kwargs)
142
  output = "".join(response['choices'][0]['text'].split('\n\n')[1:])
143
  # update history
144
- self.timestamps += [datetime.now().strftime(datetime_format)]
145
  self.chat_history += self.model.tokenizer_.hf_tokenizer.apply_chat_template([{'role': 'assistant', 'content': output}], tokenize=False, add_generation_prompt=False)
 
146
  return output
147
 
148
- def respond(self, history, **kwargs):
 
149
  """ Generator that yields responses in chat sessions. """
150
- response = self.invoke(history, **kwargs)
151
  for word in response.split():
152
  yield word + " "
153
- time.sleep(0.05)
 
 
 
 
 
11
 
12
 
13
  import time
14
+ from data_utils import end_session, load_agent_from_hf, new_user
15
+
16
  from llama_cpp import Llama
17
  from llama_cpp.llama_tokenizer import LlamaHFTokenizer
18
 
19
  # default decoding params initiation
20
  SEED = 42
 
21
  MODEL_CARD = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
22
  MODEL_PATH = "Meta-Llama-3.1-8B-Instruct-Q3_K_XL.gguf"
23
  base_model_id = "meta-llama/Llama-3.1-8B-Instruct"
 
24
  new_chat_template = """{{- bos_token }}
25
  {%- if custom_tools is defined %}
26
  {%- set tools = custom_tools %}
 
107
  {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
108
  {%- endif %}"""
109
 
 
 
 
 
110
  class Naomi:
111
  def __init__(self, **kwargs):
112
+ # init dataclasses
113
+ self.user = new_user(**kwargs)
114
+ self.agent = load_agent_from_hf('Naomi')
115
  # load the model
116
  self.model = Llama.from_pretrained(
117
  repo_id=MODEL_CARD,
118
  filename=MODEL_PATH,
119
  tokenizer=LlamaHFTokenizer.from_pretrained(base_model_id)
120
  )
 
121
  self.model.tokenizer_.hf_tokenizer.chat_template = new_chat_template
122
+
123
  # load the agents prompts
124
+ sys_msg = self.agent.system_prompt(self.user)
125
  self.chat_history = self.model.tokenizer_.hf_tokenizer.apply_chat_template(
126
+ sys_msg,
127
+ tokenize=False
128
  )
129
 
130
+ def respond(self, user_input: dict, **kwargs):
 
 
131
  """ Invoked during stream. """
132
  # user msg handling
133
+ format_user_input = self.model.tokenizer_.hf_tokenizer.apply_chat_template([user_input], tokenize=False, add_generation_prompt=False)
 
134
  self.chat_history += format_user_input
135
  # agent msg results + clean
136
  response = self.model(self.chat_history, **kwargs)
137
  output = "".join(response['choices'][0]['text'].split('\n\n')[1:])
138
  # update history
 
139
  self.chat_history += self.model.tokenizer_.hf_tokenizer.apply_chat_template([{'role': 'assistant', 'content': output}], tokenize=False, add_generation_prompt=False)
140
+
141
  return output
142
 
143
+ @staticmethod
144
+ def gen(response):
145
  """ Generator that yields responses in chat sessions. """
 
146
  for word in response.split():
147
  yield word + " "
148
+ time.sleep(0.05)
149
+
150
+ def end(self, chat_messages):
151
+ self.chat = chat_messages
152
+ end_session(self)
app.py CHANGED
@@ -11,32 +11,32 @@
11
  import time
12
  import streamlit as st
13
  from agent import Naomi
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Title of the app
16
  st.title("Chatbot Naomi")
17
 
18
  print('Initial Session state', st.session_state)
19
- contact_options = ['Instagram', 'Email', 'Number']
20
- intake_form = [
21
- 'candidate_name',
22
- 'candidate_contact_type',
23
- 'candidate_contact',
24
- 'candidate_dob',
25
- 'candidate_location',
26
- 'intake_submission'
27
- ]
28
- if "messages" not in st.session_state:
29
- st.session_state.messages = []
30
 
31
  @st.dialog('Intake form', width='large')
32
  def open_intake_form():
33
  st.markdown('Fill in your detaisl below to start chat session :)')
34
- name = st.text_input("Enter your name", key='candidate_name')
35
  contact_col_1, contact_col_2 = st.columns(spec=[0.3, 0.7], vertical_alignment='center')
36
- contact_col_1.selectbox("Select contact option", contact_options, key='candidate_contact_type')
37
- contact_col_2.text_input('Enter your username', key='candidate_contact')
38
- dob = st.date_input("When is your birthday?", key='candidate_dob')
39
- location = st.text_input('Enter your location', key='candidate_location')
40
  #button = st.button('Submit', use_container_width=True, type='primary')
41
  # after the button is clicked the page automatically reruns and the workflow starts from the beginning
42
 
@@ -49,7 +49,10 @@ def open_intake_form():
49
  def open_chat_window(**kwargs):
50
  # adds to current state (deletes if doesnt)
51
  st.session_state.update(kwargs)
52
- naomi = Naomi(**kwargs)
 
 
 
53
 
54
  st.markdown('Welcome to the chat!')
55
  msgbox = st.container(height=400, border=False)
@@ -62,13 +65,12 @@ def open_chat_window(**kwargs):
62
  # Add user message to chat history
63
  print(f'State: {st.session_state}\nUser inserted message: {user_input}')
64
 
65
- st.session_state.messages.append({"role": "user", "content": user_input})
66
  # Display user message in chat message container
67
  msgbox.chat_message("user").write(user_input)
68
- response = msgbox.chat_message('assistant').write_stream(naomi.respond(st.session_state.messages))
69
-
70
- # Append assistant's response to the messages history
71
- st.session_state.messages.append({"role": "assistant", "content": response})
72
 
73
  undo_button, reset_button = st.columns(2)
74
  if undo_button.button('Undo message', use_container_width=True, type='secondary'):
@@ -82,10 +84,14 @@ def main():
82
  if st.button('Start chat session . . .', type='primary', key='open_intake'):
83
  open_intake_form()
84
  else:
85
- st.session_state['candidate_name'] = st.session_state['candidate_name'].lower().capitalize()
86
- open_chat_window(**st.session_state)
87
-
88
- # render_agent_mood()
 
 
 
 
89
 
90
  if __name__ == '__main__':
91
  main()
 
11
  import time
12
  import streamlit as st
13
  from agent import Naomi
14
+ from data_utils import ChatSession, Contact
15
+
16
+ contact_options = Contact.__match_args__
17
+ intake_form = [
18
+ 'name',
19
+ 'contact_type',
20
+ 'contact',
21
+ 'dob',
22
+ 'location',
23
+ 'intake_submission'
24
+ ]
25
 
26
  # Title of the app
27
  st.title("Chatbot Naomi")
28
 
29
  print('Initial Session state', st.session_state)
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  @st.dialog('Intake form', width='large')
32
  def open_intake_form():
33
  st.markdown('Fill in your detaisl below to start chat session :)')
34
+ name = st.text_input("Enter your name", key='name')
35
  contact_col_1, contact_col_2 = st.columns(spec=[0.3, 0.7], vertical_alignment='center')
36
+ contact_col_1.selectbox("Select contact option", contact_options, key='contact_type')
37
+ contact_col_2.text_input('Enter your username', key='contact')
38
+ dob = st.date_input("When is your birthday?", key='dob')
39
+ location = st.text_input('Enter your location', key='location')
40
  #button = st.button('Submit', use_container_width=True, type='primary')
41
  # after the button is clicked the page automatically reruns and the workflow starts from the beginning
42
 
 
49
  def open_chat_window(**kwargs):
50
  # adds to current state (deletes if doesnt)
51
  st.session_state.update(kwargs)
52
+ st.session_state.naomi = Naomi(**kwargs)
53
+
54
+ if "messages" not in st.session_state:
55
+ st.session_state.messages = ChatSession()
56
 
57
  st.markdown('Welcome to the chat!')
58
  msgbox = st.container(height=400, border=False)
 
65
  # Add user message to chat history
66
  print(f'State: {st.session_state}\nUser inserted message: {user_input}')
67
 
68
+ st.session_state.messages.add_message(role='user', content=user_input)
69
  # Display user message in chat message container
70
  msgbox.chat_message("user").write(user_input)
71
+ response = st.session_state.naomi.invoke(st.session_state.messages[-1])
72
+ msgbox.chat_message('assistant').write_stream(st.session_state.naomi.gen(response))
73
+ st.session_state.messages.add_message(role='assistant', content=response)
 
74
 
75
  undo_button, reset_button = st.columns(2)
76
  if undo_button.button('Undo message', use_container_width=True, type='secondary'):
 
84
  if st.button('Start chat session . . .', type='primary', key='open_intake'):
85
  open_intake_form()
86
  else:
87
+ if 'end_chat' not in st.session_state:
88
+ st.session_state['name'] = st.session_state['name'].lower().capitalize()
89
+ open_chat_window(**st.session_state)
90
+ else:
91
+ if 'naomi' not in st.session_state:
92
+ st.rerun()
93
+ else:
94
+ st.session_state.naomi.end()
95
 
96
  if __name__ == '__main__':
97
  main()
data_utils.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from abc import ABC
3
+ import pandas as pd
4
+ from enum import Enum
5
+ from uuid import uuid4
6
+ from datetime import datetime
7
+ from dataclasses import dataclass, field
8
+ from langchain_core.prompts import PromptTemplate
9
+ from datasets import load_dataset, DatasetDict, Dataset, concatenate_datasets
10
+
11
+ from typing import List, Dict, Any, Literal, Optional
12
+
13
+ username = 'mimipynb'
14
+
15
+ class HFConfig(Enum):
16
+ chat = username + '/naomi-dialogue'
17
+ users = username + '/naomi-users'
18
+ results = username + '/naomi-eval'
19
+ hub = username + '/agentNet'
20
+ pepe = username + '/agentNetHuman'
21
+
22
+ def load_agent_from_hf(agent_name):
23
+ """ Loads agent from HF """
24
+
25
+ botnet = load_dataset(HFConfig.hub.value, token=True, split='train').to_pandas()
26
+ chatbot = dict(zip(botnet.columns, *botnet[botnet['name'] == agent_name].values))
27
+ chatbot.pop('agent_type')
28
+
29
+ return Agent(**chatbot)
30
+
31
+ def load_main_user():
32
+ """ Loads main user from HF. To be removed / changed. """
33
+ pepes = load_dataset(HFConfig.pepe.value, token=True, split='train').to_pandas()
34
+ pepe = dict(zip(pepes.columns, *pepes[pepes['user_type'] == 'main'].values))
35
+ pepe.pop('user_type')
36
+ pepe.pop('relation_type')
37
+ pepe.pop('repo_id')
38
+ pepe.pop('input_file_path')
39
+ pepe.pop('output_file_path')
40
+
41
+ return User(**pepe)
42
+
43
+ def uploader(repo_id, new_data):
44
+ """ Appends new streaming sessions to HF space. """
45
+
46
+ original = load_dataset(repo_id=repo_id, token=True)
47
+ if isinstance(original, DatasetDict):
48
+ original = original['train']
49
+
50
+ concat = concatenate_datasets([original, new_data])
51
+
52
+ if len(concat) != len(original) + len(new_data):
53
+ raise ValueError(f"Expected concatenated data to be to be the sum of {len(original)} and {len(new_data)} but received {len(concat)} ")
54
+
55
+ concat.push_to_hub(
56
+ repo_id=repo_id,
57
+ private=True
58
+ )
59
+ print(f"Finished pushing to {repo_id}")
60
+
61
+ def end_session(naomi):
62
+ """ Data Handlers to run end of chat session. """
63
+
64
+ chat = naomi.chat.messages
65
+ user = naomi.user
66
+ results = naomi.results
67
+ uploader(HFConfig.chat.value, Dataset.from_pandas(chat))
68
+ uploader(HFConfig.users.value, Dataset.from_dict(user))
69
+ uploader(HFConfig.results.value, Dataset.from_pandas(results))
70
+
71
+ chat_messages = [
72
+ {'role': 'user', 'content': 'Hello!'},
73
+ {'role': 'assistant', 'content': 'Hi there! How can I assist you today?'},
74
+ {'role': 'user', 'content': 'I have a question about my order.'},
75
+ {'role': 'assistant', 'content': 'Sure! What would you like to know about your order?'},
76
+ {'role': 'user', 'content': 'When will it be delivered?'},
77
+ {'role': 'assistant', 'content': 'Let me check that for you. Can you provide me with your order number?'},
78
+ {'role': 'user', 'content': 'My order number is 123456.'},
79
+ {'role': 'assistant', 'content': 'Thank you! Your order is scheduled to be delivered on March 5th.'},
80
+ {'role': 'user', 'content': 'Great! Thank you for your help.'},
81
+ {'role': 'assistant', 'content': 'You’re welcome! If you have any more questions, feel free to ask.'},
82
+ {'role': 'user', 'content': 'Will do! Have a nice day.'},
83
+ {'role': 'assistant', 'content': 'You too! Take care!'}
84
+ ]
85
+
86
+ @dataclass
87
+ class ChatMessage:
88
+ role: str
89
+ content: str
90
+ timestamp: str = field(default=datetime.now().isoformat())
91
+ inference: Dict[str, Any] = field(default_factory=dict)
92
+
93
+ def preprocess(self):
94
+ # Example preprocessing: strip whitespace and convert to lowercase
95
+ self.content = self.content.strip().lower()
96
+
97
+ def collect_features(self):
98
+ """ TODO:
99
+ - connect to classifiers / pipeline
100
+ - connect to agentDial
101
+ """
102
+ self.inference['positive'] = 0.05
103
+ self.inference['negative'] = 0.05
104
+ self.inference['neutral'] = 0.90
105
+ self.inference['intent'] = 'greeting'
106
+ self.inference['mood'] = 'neutral'
107
+
108
+ def __post_init__(self):
109
+ """ Workflow of inferencing tools. """
110
+ self.preprocess()
111
+ self.collect_features()
112
+
113
+ @dataclass
114
+ class ChatSession:
115
+ _messages: List[ChatMessage] = field(default_factory=list)
116
+ session_id: str = field(default=uuid4().hex)
117
+
118
+ def __iter__(self):
119
+ # Iterates only the role and content for tokenizing.
120
+ for item in self._messages:
121
+ yield {
122
+ 'role': item.role,
123
+ 'content': item.content
124
+ }
125
+
126
+ def __getitem__(self, index):
127
+ """ Only returns the role and content for the requested index."""
128
+ if 0 <= index < len(self._messages):
129
+ msg = self._messages[index]
130
+ return {
131
+ 'role': msg.role,
132
+ 'content': msg.content
133
+ }
134
+ raise IndexError
135
+
136
+ @property
137
+ def messages(self):
138
+ """ Returns dataframe. Includes inferenced features. """
139
+ data = pd.DataFrame(self._messages)
140
+ data['session_id'] = self.session_id
141
+ return data
142
+
143
+ def add_message(self, role: Literal['user', 'role', 'system'], content: str):
144
+ """ Adds messages to the chat sessions. """
145
+ message = ChatMessage(role=role, content=content)
146
+ self._messages.append(message)
147
+
148
+ @dataclass
149
+ class ProfileBase(ABC):
150
+ def __post_init__(self):
151
+ """ Base checks """
152
+ if hasattr(self, 'name') and self.name:
153
+ self.name = self.name.lower().capitalize()
154
+ if hasattr(self, 'prompt'):
155
+ prompt = PromptTemplate.from_template(self.prompt)
156
+ self.prompt = prompt
157
+
158
+ @dataclass
159
+ class Agent(ProfileBase):
160
+ """Setup Agent Profile or Adds Agent to Bot Family"""
161
+
162
+ name: str
163
+ prompt: str
164
+ data: dict
165
+
166
+ def system_prompt(self, candidate):
167
+ try:
168
+ main_user = load_main_user()
169
+ prompt = self.prompt.invoke(
170
+ input=dict(
171
+ user_name=main_user.name,
172
+ user_likes="\n".join(main_user.likes),
173
+ user_dislikes="\n".join(main_user.dislikes),
174
+ candidate_details=candidate.format_profile(),
175
+ **self.data
176
+ )
177
+ )
178
+ print(f"Parsed prompt: {prompt}. Full input: \n{prompt.text}")
179
+
180
+ return [{'role': 'system', 'content': prompt.text}]
181
+
182
+ except Exception as e:
183
+ print(e)
184
+ raise
185
+
186
+ @dataclass
187
+ class Contact(ProfileBase):
188
+ """User's Metaclasses -- Social"""
189
+
190
+ instagram: Optional[str] = None
191
+ email: Optional[str] = None
192
+ mobile: Optional[str] = None
193
+
194
+ @dataclass
195
+ class Biography:
196
+ """User's Metaclasses -- Biography / FAQs"""
197
+
198
+ dob: Optional[str] = None
199
+ location: Optional[str] = None
200
+ mbti_label: Optional[str] = None
201
+ education: Optional[str] = None
202
+ occupation: Optional[str] = None
203
+
204
+ @dataclass
205
+ class User(Biography, Contact):
206
+ """User's Datahandler for account creation. Metaclass: Contact"""
207
+
208
+ name: str = field(default_factory=str)
209
+ likes: List[str] = field(default_factory=list)
210
+ dislikes: List[str] = field(default_factory=list)
211
+
212
+ @dataclass
213
+ class Candidate(Contact, Biography):
214
+ """Interviewing Candidate Accessor for Agents roleplaying as Interviewers."""
215
+
216
+ name: str = field(default=str)
217
+ id: str = field(default=uuid4().hex)
218
+
219
+ def format_profile(self):
220
+ return "".join([f"{key}: {val}\n" for key, val in self.__dict__.items() if val is not None or key not in ('output_file_path', 'input_file_path', 'id')])
221
+
222
+ def new_user(**kwargs):
223
+ """ Process inputs collected from frontend to backend. Returns Candidate. """
224
+
225
+ contact_type = kwargs.get('contact_type', None)
226
+ if contact_type is not None:
227
+ contact = Contact.__match_args__[contact_type] if isinstance(contact_type, int) else contact_type
228
+ kwargs.update({contact: kwargs.get('contact', None)})
229
+
230
+ kwargs.pop('contact_type')
231
+ kwargs.pop('contact')
232
+ kwargs.pop('intake_submission')
233
+
234
+ return Candidate(**kwargs)
235
+
236
+ if __name__ == "__main__":
237
+ # Example usage for chat session
238
+ """
239
+ chat_session = ChatSession()
240
+ for msg in chat_messages:
241
+ chat_session.add_message(msg['role'], msg['content'])
242
+ print(chat_session.messages)
243
+ """
244
+ # user = load_main_user()
245
+ # print(user)
246
+
247
+ test_user = {
248
+ 'name': 'mike',
249
+ 'contact_type': 1,
250
+ 'contact': '[email protected]',
251
+ 'dob': '29/12/1800',
252
+ 'location': 'north korea',
253
+ 'intake_submission': True
254
+ }
255
+ candy = new_user(**test_user)
256
+ print(candy)