Spaces:

mimipynb
/

naomi

Running

App Files Files

Mimi commited on 25 days ago

Commit

5ed7b6c

1 Parent(s): 2f61f40

add new file

Browse files

Files changed (4) hide show

.gitignore +1 -1
agent.py +19 -20
app.py +32 -26
data_utils.py +256 -0

.gitignore CHANGED Viewed

@@ -131,4 +131,4 @@ dmypy.json
 .vscode/
 model_hugger.py
-*.key

 .vscode/
 model_hugger.py
+demo.ipynb

agent.py CHANGED Viewed

@@ -11,17 +11,16 @@ Return:
 import time
-from uuid import uuid4
 from llama_cpp import Llama
 from llama_cpp.llama_tokenizer import LlamaHFTokenizer
 # default decoding params initiation
 SEED = 42
 MODEL_CARD = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
 MODEL_PATH = "Meta-Llama-3.1-8B-Instruct-Q3_K_XL.gguf"
 base_model_id = "meta-llama/Llama-3.1-8B-Instruct"
 new_chat_template = """{{- bos_token }}
 {%- if custom_tools is defined %}
     {%- set tools = custom_tools %}
@@ -108,46 +107,46 @@ new_chat_template = """{{- bos_token }}
     {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
 {%- endif %}"""
-datetime_format = '%Y-%m-%d %H:%M:%S'
-from datetime import datetime
 class Naomi:
     def __init__(self, **kwargs):
-        self.session_id = uuid4().hex
-        self.candidate = kwargs
         # load the model
         self.model = Llama.from_pretrained(
 	        repo_id=MODEL_CARD,
 	        filename=MODEL_PATH,
             tokenizer=LlamaHFTokenizer.from_pretrained(base_model_id)
         )
         self.model.tokenizer_.hf_tokenizer.chat_template = new_chat_template
         # load the agents prompts
         self.chat_history = self.model.tokenizer_.hf_tokenizer.apply_chat_template(
         )
-        self.timestamps = []
-    def invoke(self, history, **kwargs):
         """ Invoked during stream. """
         # user msg handling
-        self.timestamps += [datetime.now().strftime(datetime_format)]
-        format_user_input = self.model.tokenizer_.hf_tokenizer.apply_chat_template(history[-1], tokenize=False, add_generation_prompt=False)
         self.chat_history += format_user_input
         # agent msg results + clean
         response = self.model(self.chat_history, **kwargs)
         output = "".join(response['choices'][0]['text'].split('\n\n')[1:])
         # update history
-        self.timestamps += [datetime.now().strftime(datetime_format)]
         self.chat_history += self.model.tokenizer_.hf_tokenizer.apply_chat_template([{'role': 'assistant', 'content': output}], tokenize=False, add_generation_prompt=False)
         return output
-    def respond(self, history, **kwargs):
         """ Generator that yields responses in chat sessions. """
-        response = self.invoke(history, **kwargs)
         for word in response.split():
             yield word + " "
-            time.sleep(0.05)

 import time
+from data_utils import end_session, load_agent_from_hf, new_user
 from llama_cpp import Llama
 from llama_cpp.llama_tokenizer import LlamaHFTokenizer
 # default decoding params initiation
 SEED = 42
 MODEL_CARD = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
 MODEL_PATH = "Meta-Llama-3.1-8B-Instruct-Q3_K_XL.gguf"
 base_model_id = "meta-llama/Llama-3.1-8B-Instruct"
 new_chat_template = """{{- bos_token }}
 {%- if custom_tools is defined %}
     {%- set tools = custom_tools %}
     {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
 {%- endif %}"""
 class Naomi:
     def __init__(self, **kwargs):
+        # init dataclasses
+        self.user = new_user(**kwargs)
+        self.agent = load_agent_from_hf('Naomi')
         # load the model
         self.model = Llama.from_pretrained(
 	        repo_id=MODEL_CARD,
 	        filename=MODEL_PATH,
             tokenizer=LlamaHFTokenizer.from_pretrained(base_model_id)
         )
         self.model.tokenizer_.hf_tokenizer.chat_template = new_chat_template
         # load the agents prompts
+        sys_msg = self.agent.system_prompt(self.user)
         self.chat_history = self.model.tokenizer_.hf_tokenizer.apply_chat_template(
+            sys_msg,
+            tokenize=False
         )
+    def respond(self, user_input: dict, **kwargs):
         """ Invoked during stream. """
         # user msg handling
+        format_user_input = self.model.tokenizer_.hf_tokenizer.apply_chat_template([user_input], tokenize=False, add_generation_prompt=False)
         self.chat_history += format_user_input
         # agent msg results + clean
         response = self.model(self.chat_history, **kwargs)
         output = "".join(response['choices'][0]['text'].split('\n\n')[1:])
         # update history
         self.chat_history += self.model.tokenizer_.hf_tokenizer.apply_chat_template([{'role': 'assistant', 'content': output}], tokenize=False, add_generation_prompt=False)
         return output
+    @staticmethod
+    def gen(response):
         """ Generator that yields responses in chat sessions. """
         for word in response.split():
             yield word + " "
+            time.sleep(0.05)
+    def end(self, chat_messages):
+        self.chat = chat_messages
+        end_session(self)

app.py CHANGED Viewed

@@ -11,32 +11,32 @@
 import time
 import streamlit as st
 from agent import Naomi
 # Title of the app
 st.title("Chatbot Naomi")
 print('Initial Session state', st.session_state)
-contact_options = ['Instagram', 'Email', 'Number']
-intake_form = [
-    'candidate_name',
-    'candidate_contact_type',
-    'candidate_contact',
-    'candidate_dob',
-    'candidate_location',
-    'intake_submission'
-]
-if "messages" not in st.session_state:
-    st.session_state.messages = []
 @st.dialog('Intake form', width='large')
 def open_intake_form():
     st.markdown('Fill in your detaisl below to start chat session :)')
-    name = st.text_input("Enter your name", key='candidate_name')
     contact_col_1, contact_col_2 = st.columns(spec=[0.3, 0.7], vertical_alignment='center')
-    contact_col_1.selectbox("Select contact option", contact_options, key='candidate_contact_type')
-    contact_col_2.text_input('Enter your username', key='candidate_contact')
-    dob = st.date_input("When is your birthday?", key='candidate_dob')
-    location = st.text_input('Enter your location', key='candidate_location')
     #button = st.button('Submit', use_container_width=True, type='primary')
     # after the button is clicked the page automatically reruns and the workflow starts from the beginning
@@ -49,7 +49,10 @@ def open_intake_form():
 def open_chat_window(**kwargs):
     # adds to current state (deletes if doesnt)
     st.session_state.update(kwargs)
-    naomi = Naomi(**kwargs)
     st.markdown('Welcome to the chat!')
     msgbox = st.container(height=400, border=False)
@@ -62,13 +65,12 @@ def open_chat_window(**kwargs):
         # Add user message to chat history
         print(f'State: {st.session_state}\nUser inserted message: {user_input}')
-        st.session_state.messages.append({"role": "user", "content": user_input})
         # Display user message in chat message container
         msgbox.chat_message("user").write(user_input)
-        response = msgbox.chat_message('assistant').write_stream(naomi.respond(st.session_state.messages))
-        # Append assistant's response to the messages history
-        st.session_state.messages.append({"role": "assistant", "content": response})
     undo_button, reset_button = st.columns(2)
     if undo_button.button('Undo message', use_container_width=True, type='secondary'):
@@ -82,10 +84,14 @@ def main():
         if st.button('Start chat session . . .', type='primary', key='open_intake'):
             open_intake_form()
     else:
-        st.session_state['candidate_name'] = st.session_state['candidate_name'].lower().capitalize()
-        open_chat_window(**st.session_state)
-        # render_agent_mood()
 if __name__ == '__main__':
     main()

 import time
 import streamlit as st
 from agent import Naomi
+from data_utils import ChatSession, Contact
+contact_options = Contact.__match_args__
+intake_form = [
+    'name',
+    'contact_type',
+    'contact',
+    'dob',
+    'location',
+    'intake_submission'
+]
 # Title of the app
 st.title("Chatbot Naomi")
 print('Initial Session state', st.session_state)
 @st.dialog('Intake form', width='large')
 def open_intake_form():
     st.markdown('Fill in your detaisl below to start chat session :)')
+    name = st.text_input("Enter your name", key='name')
     contact_col_1, contact_col_2 = st.columns(spec=[0.3, 0.7], vertical_alignment='center')
+    contact_col_1.selectbox("Select contact option", contact_options, key='contact_type')
+    contact_col_2.text_input('Enter your username', key='contact')
+    dob = st.date_input("When is your birthday?", key='dob')
+    location = st.text_input('Enter your location', key='location')
     #button = st.button('Submit', use_container_width=True, type='primary')
     # after the button is clicked the page automatically reruns and the workflow starts from the beginning
 def open_chat_window(**kwargs):
     # adds to current state (deletes if doesnt)
     st.session_state.update(kwargs)
+    st.session_state.naomi = Naomi(**kwargs)
+    if "messages" not in st.session_state:
+        st.session_state.messages = ChatSession()
     st.markdown('Welcome to the chat!')
     msgbox = st.container(height=400, border=False)
         # Add user message to chat history
         print(f'State: {st.session_state}\nUser inserted message: {user_input}')
+        st.session_state.messages.add_message(role='user', content=user_input)
         # Display user message in chat message container
         msgbox.chat_message("user").write(user_input)
+        response = st.session_state.naomi.invoke(st.session_state.messages[-1])
+        msgbox.chat_message('assistant').write_stream(st.session_state.naomi.gen(response))
+        st.session_state.messages.add_message(role='assistant', content=response)
     undo_button, reset_button = st.columns(2)
     if undo_button.button('Undo message', use_container_width=True, type='secondary'):
         if st.button('Start chat session . . .', type='primary', key='open_intake'):
             open_intake_form()
     else:
+        if 'end_chat' not in st.session_state:
+            st.session_state['name'] = st.session_state['name'].lower().capitalize()
+            open_chat_window(**st.session_state)
+        else:
+            if 'naomi' not in st.session_state:
+                st.rerun()
+            else:
+                st.session_state.naomi.end()
 if __name__ == '__main__':
     main()

data_utils.py ADDED Viewed

	@@ -0,0 +1,256 @@

+from abc import ABC
+import pandas as pd
+from enum import Enum
+from uuid import uuid4
+from datetime import datetime
+from dataclasses import dataclass, field
+from langchain_core.prompts import PromptTemplate
+from datasets import load_dataset, DatasetDict, Dataset, concatenate_datasets
+from typing import List, Dict, Any, Literal, Optional
+username = 'mimipynb'
+class HFConfig(Enum):
+    chat = username + '/naomi-dialogue'
+    users = username + '/naomi-users'
+    results = username + '/naomi-eval'
+    hub = username + '/agentNet'
+    pepe = username + '/agentNetHuman'
+def load_agent_from_hf(agent_name):
+    """ Loads agent from HF """
+    botnet = load_dataset(HFConfig.hub.value, token=True, split='train').to_pandas()
+    chatbot = dict(zip(botnet.columns, *botnet[botnet['name'] == agent_name].values))
+    chatbot.pop('agent_type')
+    return Agent(**chatbot)
+def load_main_user():
+    """ Loads main user from HF. To be removed / changed. """
+    pepes = load_dataset(HFConfig.pepe.value, token=True, split='train').to_pandas()
+    pepe = dict(zip(pepes.columns, *pepes[pepes['user_type'] == 'main'].values))
+    pepe.pop('user_type')
+    pepe.pop('relation_type')
+    pepe.pop('repo_id')
+    pepe.pop('input_file_path')
+    pepe.pop('output_file_path')
+    return User(**pepe)
+def uploader(repo_id, new_data):
+    """ Appends new streaming sessions to HF space. """
+    original = load_dataset(repo_id=repo_id, token=True)
+    if isinstance(original, DatasetDict):
+        original = original['train']
+    concat = concatenate_datasets([original, new_data])
+    if len(concat) != len(original) + len(new_data):
+        raise ValueError(f"Expected concatenated data to be to be the sum of {len(original)} and {len(new_data)} but received {len(concat)} ")
+    concat.push_to_hub(
+        repo_id=repo_id,
+        private=True
+    )
+    print(f"Finished pushing to {repo_id}")
+def end_session(naomi):
+    """ Data Handlers to run end of chat session. """
+    chat = naomi.chat.messages
+    user = naomi.user
+    results = naomi.results
+    uploader(HFConfig.chat.value, Dataset.from_pandas(chat))
+    uploader(HFConfig.users.value, Dataset.from_dict(user))
+    uploader(HFConfig.results.value, Dataset.from_pandas(results))
+chat_messages = [
+    {'role': 'user', 'content': 'Hello!'},
+    {'role': 'assistant', 'content': 'Hi there! How can I assist you today?'},
+    {'role': 'user', 'content': 'I have a question about my order.'},
+    {'role': 'assistant', 'content': 'Sure! What would you like to know about your order?'},
+    {'role': 'user', 'content': 'When will it be delivered?'},
+    {'role': 'assistant', 'content': 'Let me check that for you. Can you provide me with your order number?'},
+    {'role': 'user', 'content': 'My order number is 123456.'},
+    {'role': 'assistant', 'content': 'Thank you! Your order is scheduled to be delivered on March 5th.'},
+    {'role': 'user', 'content': 'Great! Thank you for your help.'},
+    {'role': 'assistant', 'content': 'You’re welcome! If you have any more questions, feel free to ask.'},
+    {'role': 'user', 'content': 'Will do! Have a nice day.'},
+    {'role': 'assistant', 'content': 'You too! Take care!'}
+]
+@dataclass
+class ChatMessage:
+    role: str
+    content: str
+    timestamp: str = field(default=datetime.now().isoformat())
+    inference: Dict[str, Any] = field(default_factory=dict)
+    def preprocess(self):
+        # Example preprocessing: strip whitespace and convert to lowercase
+        self.content = self.content.strip().lower()
+    def collect_features(self):
+        """ TODO:
+            - connect to classifiers / pipeline
+            - connect to agentDial
+        """
+        self.inference['positive'] = 0.05
+        self.inference['negative'] = 0.05
+        self.inference['neutral'] = 0.90
+        self.inference['intent'] = 'greeting'
+        self.inference['mood'] = 'neutral'
+    def __post_init__(self):
+        """ Workflow of inferencing tools. """
+        self.preprocess()
+        self.collect_features()
+@dataclass
+class ChatSession:
+    _messages: List[ChatMessage] = field(default_factory=list)
+    session_id: str = field(default=uuid4().hex)
+    def __iter__(self):
+        # Iterates only the role and content for tokenizing.
+        for item in self._messages:
+            yield {
+                'role': item.role,
+                'content': item.content
+            }
+    def __getitem__(self, index):
+        """ Only returns the role and content for the requested index."""
+        if 0 <= index < len(self._messages):
+            msg = self._messages[index]
+            return {
+                'role': msg.role,
+                'content': msg.content
+            }
+        raise IndexError
+    @property
+    def messages(self):
+        """ Returns dataframe. Includes inferenced features. """
+        data = pd.DataFrame(self._messages)
+        data['session_id'] = self.session_id
+        return data
+    def add_message(self, role: Literal['user', 'role', 'system'], content: str):
+        """ Adds messages to the chat sessions. """
+        message = ChatMessage(role=role, content=content)
+        self._messages.append(message)
+@dataclass
+class ProfileBase(ABC):
+    def __post_init__(self):
+        """ Base checks """
+        if hasattr(self, 'name') and self.name:
+            self.name = self.name.lower().capitalize()
+        if hasattr(self, 'prompt'):
+            prompt = PromptTemplate.from_template(self.prompt)
+            self.prompt = prompt
+@dataclass
+class Agent(ProfileBase):
+    """Setup Agent Profile or Adds Agent to Bot Family"""
+    name: str
+    prompt: str
+    data: dict
+    def system_prompt(self, candidate):
+        try:
+            main_user = load_main_user()
+            prompt = self.prompt.invoke(
+                input=dict(
+                    user_name=main_user.name,
+                    user_likes="\n".join(main_user.likes),
+                    user_dislikes="\n".join(main_user.dislikes),
+                    candidate_details=candidate.format_profile(),
+                    **self.data
+                )
+            )
+            print(f"Parsed prompt: {prompt}. Full input: \n{prompt.text}")
+            return [{'role': 'system', 'content': prompt.text}]
+        except Exception as e:
+            print(e)
+            raise
+@dataclass
+class Contact(ProfileBase):
+    """User's Metaclasses -- Social"""
+    instagram: Optional[str] = None
+    email: Optional[str] = None
+    mobile: Optional[str] = None
+@dataclass
+class Biography:
+    """User's Metaclasses -- Biography / FAQs"""
+    dob: Optional[str] = None
+    location: Optional[str] = None
+    mbti_label: Optional[str] = None
+    education: Optional[str] = None
+    occupation: Optional[str] = None
+@dataclass
+class User(Biography, Contact):
+    """User's Datahandler for account creation. Metaclass: Contact"""
+    name: str = field(default_factory=str)
+    likes: List[str] = field(default_factory=list)
+    dislikes: List[str] = field(default_factory=list)
+@dataclass
+class Candidate(Contact, Biography):
+    """Interviewing Candidate Accessor for Agents roleplaying as Interviewers."""
+    name: str = field(default=str)
+    id: str = field(default=uuid4().hex)
+    def format_profile(self):
+        return "".join([f"{key}: {val}\n" for key, val in self.__dict__.items() if val is not None or key not in ('output_file_path', 'input_file_path', 'id')])
+def new_user(**kwargs):
+    """ Process inputs collected from frontend to backend. Returns Candidate. """
+    contact_type = kwargs.get('contact_type', None)
+    if contact_type is not None:
+        contact = Contact.__match_args__[contact_type] if isinstance(contact_type, int) else contact_type
+        kwargs.update({contact: kwargs.get('contact', None)})
+    kwargs.pop('contact_type')
+    kwargs.pop('contact')
+    kwargs.pop('intake_submission')
+    return Candidate(**kwargs)
+if __name__ == "__main__":
+    # Example usage for chat session
+    """
+    chat_session = ChatSession()
+    for msg in chat_messages:
+        chat_session.add_message(msg['role'], msg['content'])
+    print(chat_session.messages)
+    """
+    # user = load_main_user()
+    # print(user)
+    test_user = {
+        'name': 'mike',
+        'contact_type': 1,
+        'contact': '[email protected]',
+        'dob': '29/12/1800',
+        'location': 'north korea',
+        'intake_submission': True
+    }
+    candy = new_user(**test_user)
+    print(candy)