File size: 8,435 Bytes
5ed7b6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39a8462
5ed7b6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255

from abc import ABC
import pandas as pd
from enum import Enum
from uuid import uuid4
from datetime import datetime
from dataclasses import dataclass, field
from langchain_core.prompts import PromptTemplate
from datasets import load_dataset, DatasetDict, Dataset, concatenate_datasets

from typing import List, Dict, Any, Literal, Optional

username = 'mimipynb'

class HFConfig(Enum):
    chat = username + '/naomi-dialogue'
    users = username + '/naomi-users'
    results = username + '/naomi-eval'
    hub = username + '/agentNet'
    pepe = username + '/agentNetHuman'

def load_agent_from_hf(agent_name):
    """ Loads agent from HF """

    botnet = load_dataset(HFConfig.hub.value, token=True, split='train').to_pandas()
    chatbot = dict(zip(botnet.columns, *botnet[botnet['name'] == agent_name].values))
    chatbot.pop('agent_type')

    return Agent(**chatbot)

def load_main_user():
    """ Loads main user from HF. To be removed / changed. """
    pepes = load_dataset(HFConfig.pepe.value, token=True, split='train').to_pandas()
    pepe = dict(zip(pepes.columns, *pepes[pepes['user_type'] == 'main'].values))
    pepe.pop('user_type')
    pepe.pop('relation_type')
    pepe.pop('repo_id')
    pepe.pop('input_file_path')
    pepe.pop('output_file_path')

    return User(**pepe)

def uploader(repo_id, new_data):
    """ Appends new streaming sessions to HF space. """

    original = load_dataset(repo_id=repo_id, token=True)
    if isinstance(original, DatasetDict):
        original = original['train']

    concat = concatenate_datasets([original, new_data])

    if len(concat) != len(original) + len(new_data):
        raise ValueError(f"Expected concatenated data to be to be the sum of {len(original)} and {len(new_data)} but received {len(concat)} ")

    concat.push_to_hub(
        repo_id=repo_id,
        private=True
    )
    print(f"Finished pushing to {repo_id}")

def end_session(naomi):
    """ Data Handlers to run end of chat session. """

    chat = naomi.chat.messages
    user = naomi.user
    results = naomi.results
    uploader(HFConfig.chat.value, Dataset.from_pandas(chat))
    uploader(HFConfig.users.value, Dataset.from_dict(user))
    uploader(HFConfig.results.value, Dataset.from_pandas(results))

chat_messages = [
    {'role': 'user', 'content': 'Hello!'},
    {'role': 'assistant', 'content': 'Hi there! How can I assist you today?'},
    {'role': 'user', 'content': 'I have a question about my order.'},
    {'role': 'assistant', 'content': 'Sure! What would you like to know about your order?'},
    {'role': 'user', 'content': 'When will it be delivered?'},
    {'role': 'assistant', 'content': 'Let me check that for you. Can you provide me with your order number?'},
    {'role': 'user', 'content': 'My order number is 123456.'},
    {'role': 'assistant', 'content': 'Thank you! Your order is scheduled to be delivered on March 5th.'},
    {'role': 'user', 'content': 'Great! Thank you for your help.'},
    {'role': 'assistant', 'content': 'You’re welcome! If you have any more questions, feel free to ask.'},
    {'role': 'user', 'content': 'Will do! Have a nice day.'},
    {'role': 'assistant', 'content': 'You too! Take care!'}
]

@dataclass
class ChatMessage:
    role: str
    content: str
    timestamp: str = field(default=datetime.now().isoformat())
    inference: Dict[str, Any] = field(default_factory=dict)

    def preprocess(self):
        # Example preprocessing: strip whitespace and convert to lowercase
        self.content = self.content.strip().lower()

    def collect_features(self):
        """ TODO:
            - connect to classifiers / pipeline
            - connect to agentDial
        """
        self.inference['positive'] = 0.05
        self.inference['negative'] = 0.05
        self.inference['neutral'] = 0.90
        self.inference['intent'] = 'greeting'
        self.inference['mood'] = 'neutral'

    def __post_init__(self):
        """ Workflow of inferencing tools. """
        self.preprocess()
        self.collect_features()

@dataclass
class ChatSession:
    _messages: List[ChatMessage] = field(default_factory=list)
    session_id: str = field(default=uuid4().hex)

    def __iter__(self):
        # Iterates only the role and content for tokenizing.
        for item in self._messages:
            yield {
                'role': item.role,
                'content': item.content
            }

    def __getitem__(self, index):
        """ Only returns the role and content for the requested index."""
        if -len(self._messages) <= index < len(self._messages):
            msg = self._messages[index]
            return {
                'role': msg.role,
                'content': msg.content
            }
        raise IndexError

    @property
    def messages(self):
        """ Returns dataframe. Includes inferenced features. """
        data = pd.DataFrame(self._messages)
        data['session_id'] = self.session_id
        return data

    def add_message(self, role: Literal['user', 'role', 'system'], content: str):
        """ Adds messages to the chat sessions. """
        message = ChatMessage(role=role, content=content)
        self._messages.append(message)

@dataclass
class ProfileBase(ABC):
    def __post_init__(self):
        """ Base checks """
        if hasattr(self, 'name') and self.name:
            self.name = self.name.lower().capitalize()
        if hasattr(self, 'prompt'):
            prompt = PromptTemplate.from_template(self.prompt)
            self.prompt = prompt

@dataclass
class Agent(ProfileBase):
    """Setup Agent Profile or Adds Agent to Bot Family"""

    name: str
    prompt: str
    data: dict

    def system_prompt(self, candidate):
        try:
            main_user = load_main_user()
            prompt = self.prompt.invoke(
                input=dict(
                    user_name=main_user.name,
                    user_likes="\n".join(main_user.likes),
                    user_dislikes="\n".join(main_user.dislikes),
                    candidate_details=candidate.format_profile(),
                    **self.data
                )
            )
            print(f"Parsed prompt: {prompt}. Full input: \n{prompt.text}")

            return [{'role': 'system', 'content': prompt.text}]

        except Exception as e:
            print(e)
            raise

@dataclass
class Contact(ProfileBase):
    """User's Metaclasses -- Social"""

    instagram: Optional[str] = None
    email: Optional[str] = None
    mobile: Optional[str] = None

@dataclass
class Biography:
    """User's Metaclasses -- Biography / FAQs"""

    dob: Optional[str] = None
    location: Optional[str] = None
    mbti_label: Optional[str] = None
    education: Optional[str] = None
    occupation: Optional[str] = None

@dataclass
class User(Biography, Contact):
    """User's Datahandler for account creation. Metaclass: Contact"""

    name: str = field(default_factory=str)
    likes: List[str] = field(default_factory=list)
    dislikes: List[str] = field(default_factory=list)

@dataclass
class Candidate(Contact, Biography):
    """Interviewing Candidate Accessor for Agents roleplaying as Interviewers."""

    name: str = field(default=str)
    id: str = field(default=uuid4().hex)

    def format_profile(self):
        return "".join([f"{key}: {val}\n" for key, val in self.__dict__.items() if val is not None or key not in ('output_file_path', 'input_file_path', 'id')])

def new_user(**kwargs):
    """ Process inputs collected from frontend to backend. Returns Candidate. """

    contact_type = kwargs.get('contact_type', None)
    if contact_type is not None:
        contact = Contact.__match_args__[contact_type] if isinstance(contact_type, int) else contact_type
        kwargs.update({contact: kwargs.get('contact', None)})

    kwargs.pop('contact_type')
    kwargs.pop('contact')
    return Candidate(**kwargs)

if __name__ == "__main__":
    # Example usage for chat session
    """
    chat_session = ChatSession()
    for msg in chat_messages:
        chat_session.add_message(msg['role'], msg['content'])
    print(chat_session.messages)
    """
    # user = load_main_user()
    # print(user)

    test_user = {
        'name': 'mike',
        'contact_type': 1,
        'contact': '[email protected]',
        'dob': '29/12/1800',
        'location': 'north korea',
        'intake_submission': True
    }
    candy = new_user(**test_user)
    print(candy)