lesimoes commited on
Commit
112182e
·
1 Parent(s): 15be47c

add knowledge base

Browse files
__pycache__/knowledge_base.cpython-312.pyc ADDED
Binary file (1.48 kB). View file
 
app.py CHANGED
@@ -4,26 +4,34 @@ import streamlit as st
4
  from transformers import pipeline
5
  from langchain_core.prompts import PromptTemplate
6
  from langchain_core.output_parsers import StrOutputParser
 
7
 
8
- # Chat parameters
9
- first_ia_message = "Hello, there! How can I help you today?"
10
- system_message = "You are a friendly AI conversing with a human user."
11
- text_placeholder = "Enter your text here."
12
- text_waiting_ai_response = "Thinking..."
13
- max_response_length = 256
14
- reset_button_label = "Reset Chat History"
15
 
16
  # Models and Pipeline
17
  model_id="mistralai/Mistral-7B-Instruct-v0.3"
18
  translation_model_id = "Helsinki-NLP/opus-mt-tc-big-en-pt"
19
 
 
 
 
 
 
 
 
 
 
 
 
20
  translation_pipeline = pipeline(
21
  "translation_en_to_pt",
22
  model=translation_model_id,
23
  token=os.getenv("HF_TOKEN")
24
  )
25
 
26
- def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=0.1):
27
  llm = HuggingFaceEndpoint(
28
  repo_id=model_id,
29
  max_new_tokens=max_new_tokens,
@@ -38,9 +46,9 @@ def translate_to_portuguese(text):
38
  return translation[0]['translation_text']
39
 
40
  # Configure the Streamlit app
41
- st.set_page_config(page_title="Personal ChatBot", page_icon="🤗")
42
- st.title("Personal ChatBot")
43
- st.markdown(f"* A simple chatbot with {model_id} and {translation_model_id}.*")
44
 
45
  # Initialize session state for avatars
46
  if "avatars" not in st.session_state:
@@ -77,24 +85,31 @@ if "chat_history" not in st.session_state or reset_history:
77
  st.session_state.chat_history = [{"role": "assistant", "content": st.session_state.starter_message}]
78
 
79
  def get_response(system_message, chat_history, user_text,
80
- eos_token_id=['User'], max_new_tokens=256, get_llm_hf_kws={}):
81
  # Set up model with token and temperature
82
- hf = get_llm_hf_inference(max_new_tokens=max_new_tokens, temperature=0.1)
83
 
 
84
  # Create the prompt template
85
  prompt = PromptTemplate.from_template(
86
  (
87
  "[INST] {system_message}"
 
88
  "\nCurrent Conversation:\n{chat_history}\n\n"
89
  "\nUser: {user_text}.\n [/INST]"
90
  "\nAI:"
91
  )
92
  )
93
 
94
- # Response template
95
  chat = prompt | hf.bind(skip_prompt=True) | StrOutputParser(output_key='content')
96
 
97
- response = chat.invoke(input=dict(system_message=system_message, user_text=user_text, chat_history=chat_history))
 
 
 
 
 
98
  response = response.split("AI:")[-1]
99
  response = translate_to_portuguese(response)
100
  chat_history.append({'role': 'user', 'content': user_text})
 
4
  from transformers import pipeline
5
  from langchain_core.prompts import PromptTemplate
6
  from langchain_core.output_parsers import StrOutputParser
7
+ from knowledge_base import load_knowledge_base, format_knowledge_base
8
 
9
+ # Load database
10
+ knowledge = load_knowledge_base("database.txt")
11
+ knowledge_context = format_knowledge_base(knowledge)
 
 
 
 
12
 
13
  # Models and Pipeline
14
  model_id="mistralai/Mistral-7B-Instruct-v0.3"
15
  translation_model_id = "Helsinki-NLP/opus-mt-tc-big-en-pt"
16
 
17
+ # Chat parameters
18
+ first_ia_message = "Olá, quais são os seus sintomas?"
19
+ system_message = "You are a doctor who will help, based on the symptoms, and will give a diagnosis in Brazilian Portuguese. Your answer should be direct, simple and short, you can even ask a question to provide a more accurate answer. You should ask only about health. You should answer only questions about health."
20
+ text_placeholder = "Enter your text here."
21
+ text_waiting_ai_response = "Pensando..."
22
+ max_response_length = 256
23
+ reset_button_label = "Reset Chat History"
24
+ chatbot_title = "ChatBot Sintomas"
25
+ chatbot_description = f"* Um chatbot de sintomas que usa os modelos {model_id} e {translation_model_id}.*"
26
+ temperature = 0.1
27
+
28
  translation_pipeline = pipeline(
29
  "translation_en_to_pt",
30
  model=translation_model_id,
31
  token=os.getenv("HF_TOKEN")
32
  )
33
 
34
+ def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=temperature):
35
  llm = HuggingFaceEndpoint(
36
  repo_id=model_id,
37
  max_new_tokens=max_new_tokens,
 
46
  return translation[0]['translation_text']
47
 
48
  # Configure the Streamlit app
49
+ st.set_page_config(page_title=chatbot_title, page_icon="🤗")
50
+ st.title(chatbot_title)
51
+ st.markdown(chatbot_description)
52
 
53
  # Initialize session state for avatars
54
  if "avatars" not in st.session_state:
 
85
  st.session_state.chat_history = [{"role": "assistant", "content": st.session_state.starter_message}]
86
 
87
  def get_response(system_message, chat_history, user_text,
88
+ eos_token_id=['User'], max_new_tokens=max_response_length, get_llm_hf_kws={}):
89
  # Set up model with token and temperature
90
+ hf = get_llm_hf_inference(max_new_tokens=max_new_tokens, temperature=temperature)
91
 
92
+
93
  # Create the prompt template
94
  prompt = PromptTemplate.from_template(
95
  (
96
  "[INST] {system_message}"
97
+ "{knowledge_context}\n"
98
  "\nCurrent Conversation:\n{chat_history}\n\n"
99
  "\nUser: {user_text}.\n [/INST]"
100
  "\nAI:"
101
  )
102
  )
103
 
104
+ # Include knowledge database
105
  chat = prompt | hf.bind(skip_prompt=True) | StrOutputParser(output_key='content')
106
 
107
+ response = chat.invoke(input={
108
+ "system_message": system_message,
109
+ "knowledge_context": knowledge_context,
110
+ "user_text": user_text,
111
+ "chat_history": chat_history
112
+ })
113
  response = response.split("AI:")[-1]
114
  response = translate_to_portuguese(response)
115
  chat_history.append({'role': 'user', 'content': user_text})
database.txt ADDED
The diff for this file is too large to render. See raw diff
 
knowledge_base.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def load_knowledge_base(caminho):
4
+ base_de_conhecimento = {}
5
+ with open(caminho, 'r', encoding='utf-8') as arquivo:
6
+ conteudo = arquivo.read()
7
+ # Dividir cada entrada por linhas vazias
8
+ entradas = re.split(r'\n\s*\n', conteudo)
9
+ for entrada in entradas:
10
+ linhas = entrada.strip().split('\n')
11
+ sintoma = None
12
+ diagnostico = None
13
+ for linha in linhas:
14
+ if linha.startswith("Sintoma:"):
15
+ sintoma = linha.split(":", 1)[1].strip().lower()
16
+ # elif linha.startswith("Diagnóstico:"):
17
+ # diagnostico = linha.split(":", 1)[1].strip()
18
+ if sintoma and diagnostico:
19
+ base_de_conhecimento[sintoma] = diagnostico
20
+ return base_de_conhecimento
21
+
22
+ def format_knowledge_base(knowledge):
23
+ if not knowledge:
24
+ return ""
25
+
26
+ formatted = "Base de Conhecimento:\n"
27
+ for item in knowledge:
28
+ formatted += f"Sintoma {item['sintoma']}\nConteúdo: {item['content']}\n\n"
29
+ return formatted