Spaces:

lesimoes
/

chatbot-mistral

Sleeping

App Files Files Community

lesimoes commited on Feb 8

Commit

112182e

1 Parent(s): 15be47c

add knowledge base

Browse files

Files changed (4) hide show

__pycache__/knowledge_base.cpython-312.pyc +0 -0
app.py +30 -15
database.txt +0 -0
knowledge_base.py +29 -0

__pycache__/knowledge_base.cpython-312.pyc ADDED Viewed

Binary file (1.48 kB). View file

app.py CHANGED Viewed

@@ -4,26 +4,34 @@ import streamlit as st
 from transformers import pipeline
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
-# Chat parameters
-first_ia_message = "Hello, there! How can I help you today?"
-system_message = "You are a friendly AI conversing with a human user."
-text_placeholder = "Enter your text here."
-text_waiting_ai_response = "Thinking..."
-max_response_length = 256
-reset_button_label = "Reset Chat History"
 # Models and Pipeline
 model_id="mistralai/Mistral-7B-Instruct-v0.3"
 translation_model_id = "Helsinki-NLP/opus-mt-tc-big-en-pt"
 translation_pipeline = pipeline(
     "translation_en_to_pt",
     model=translation_model_id,
     token=os.getenv("HF_TOKEN")
 )
-def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=0.1):
     llm = HuggingFaceEndpoint(
         repo_id=model_id,
         max_new_tokens=max_new_tokens,
@@ -38,9 +46,9 @@ def translate_to_portuguese(text):
     return translation[0]['translation_text']
 # Configure the Streamlit app
-st.set_page_config(page_title="Personal ChatBot", page_icon="🤗")
-st.title("Personal ChatBot")
-st.markdown(f"* A simple chatbot with {model_id} and {translation_model_id}.*")
 # Initialize session state for avatars
 if "avatars" not in st.session_state:
@@ -77,24 +85,31 @@ if "chat_history" not in st.session_state or reset_history:
     st.session_state.chat_history = [{"role": "assistant", "content": st.session_state.starter_message}]
 def get_response(system_message, chat_history, user_text,
-                 eos_token_id=['User'], max_new_tokens=256, get_llm_hf_kws={}):
     # Set up model with token and temperature
-    hf = get_llm_hf_inference(max_new_tokens=max_new_tokens, temperature=0.1)
     # Create the prompt template
     prompt = PromptTemplate.from_template(
         (
             "[INST] {system_message}"
             "\nCurrent Conversation:\n{chat_history}\n\n"
             "\nUser: {user_text}.\n [/INST]"
             "\nAI:"
         )
     )
-    # Response template
     chat = prompt | hf.bind(skip_prompt=True) | StrOutputParser(output_key='content')
-    response = chat.invoke(input=dict(system_message=system_message, user_text=user_text, chat_history=chat_history))
     response = response.split("AI:")[-1]
     response = translate_to_portuguese(response)
     chat_history.append({'role': 'user', 'content': user_text})

 from transformers import pipeline
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
+from knowledge_base import load_knowledge_base, format_knowledge_base
+# Load database
+knowledge = load_knowledge_base("database.txt")
+knowledge_context = format_knowledge_base(knowledge)
 # Models and Pipeline
 model_id="mistralai/Mistral-7B-Instruct-v0.3"
 translation_model_id = "Helsinki-NLP/opus-mt-tc-big-en-pt"
+# Chat parameters
+first_ia_message = "Olá, quais são os seus sintomas?"
+system_message = "You are a doctor who will help, based on the symptoms, and will give a diagnosis in Brazilian Portuguese. Your answer should be direct, simple and short, you can even ask a question to provide a more accurate answer. You should ask only about health. You should answer only questions about health."
+text_placeholder = "Enter your text here."
+text_waiting_ai_response = "Pensando..."
+max_response_length = 256
+reset_button_label = "Reset Chat History"
+chatbot_title = "ChatBot Sintomas"
+chatbot_description = f"* Um chatbot de sintomas que usa os modelos {model_id} e {translation_model_id}.*"
+temperature = 0.1
 translation_pipeline = pipeline(
     "translation_en_to_pt",
     model=translation_model_id,
     token=os.getenv("HF_TOKEN")
 )
+def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=temperature):
     llm = HuggingFaceEndpoint(
         repo_id=model_id,
         max_new_tokens=max_new_tokens,
     return translation[0]['translation_text']
 # Configure the Streamlit app
+st.set_page_config(page_title=chatbot_title, page_icon="🤗")
+st.title(chatbot_title)
+st.markdown(chatbot_description)
 # Initialize session state for avatars
 if "avatars" not in st.session_state:
     st.session_state.chat_history = [{"role": "assistant", "content": st.session_state.starter_message}]
 def get_response(system_message, chat_history, user_text,
+                 eos_token_id=['User'], max_new_tokens=max_response_length, get_llm_hf_kws={}):
     # Set up model with token and temperature
+    hf = get_llm_hf_inference(max_new_tokens=max_new_tokens, temperature=temperature)
     # Create the prompt template
     prompt = PromptTemplate.from_template(
         (
             "[INST] {system_message}"
+            "{knowledge_context}\n"
             "\nCurrent Conversation:\n{chat_history}\n\n"
             "\nUser: {user_text}.\n [/INST]"
             "\nAI:"
         )
     )
+    # Include knowledge database
     chat = prompt | hf.bind(skip_prompt=True) | StrOutputParser(output_key='content')
+    response = chat.invoke(input={
+        "system_message": system_message,
+        "knowledge_context": knowledge_context,
+        "user_text": user_text,
+        "chat_history": chat_history
+    })
     response = response.split("AI:")[-1]
     response = translate_to_portuguese(response)
     chat_history.append({'role': 'user', 'content': user_text})

database.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

knowledge_base.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import re
+def load_knowledge_base(caminho):
+    base_de_conhecimento = {}
+    with open(caminho, 'r', encoding='utf-8') as arquivo:
+        conteudo = arquivo.read()
+        # Dividir cada entrada por linhas vazias
+        entradas = re.split(r'\n\s*\n', conteudo)
+        for entrada in entradas:
+            linhas = entrada.strip().split('\n')
+            sintoma = None
+            diagnostico = None
+            for linha in linhas:
+                if linha.startswith("Sintoma:"):
+                    sintoma = linha.split(":", 1)[1].strip().lower()
+                # elif linha.startswith("Diagnóstico:"):
+                #     diagnostico = linha.split(":", 1)[1].strip()
+            if sintoma and diagnostico:
+                base_de_conhecimento[sintoma] = diagnostico
+    return base_de_conhecimento
+def format_knowledge_base(knowledge):
+    if not knowledge:
+        return ""
+    formatted = "Base de Conhecimento:\n"
+    for item in knowledge:
+        formatted += f"Sintoma {item['sintoma']}\nConteúdo: {item['content']}\n\n"
+    return formatted