added app files
Browse files- .DS_Store +0 -0
- .gitattributes +1 -0
- Dockerfile +15 -0
- app.py +73 -0
- backend.py +138 -0
- documents/.DS_Store +0 -0
- documents/blockchain/.DS_Store +0 -0
- documents/blockchain/Blockchain.pdf +3 -0
- documents/metaverso/.DS_Store +0 -0
- documents/metaverso/ConvMetaverse.pdf +3 -0
- documents/payment/.DS_Store +0 -0
- documents/payment/Payments.pdf +3 -0
- requirements.txt +10 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
RUN useradd -m -u 1000 user
|
4 |
+
USER user
|
5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
+
|
7 |
+
WORKDIR /app
|
8 |
+
|
9 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
11 |
+
|
12 |
+
RUN ollama run gemma2:2b
|
13 |
+
|
14 |
+
COPY --chown=user . /app
|
15 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from backend import get_answer, get_clarification_answer, load_documents, prepare_documents
|
3 |
+
import logging
|
4 |
+
|
5 |
+
logging.basicConfig(level=logging.DEBUG)
|
6 |
+
logger = logging.getLogger(__name__)
|
7 |
+
|
8 |
+
st.title("Chatbot Osservatori")
|
9 |
+
|
10 |
+
# Initialize session state if not already set
|
11 |
+
if 'step' not in st.session_state:
|
12 |
+
st.session_state.step = 1
|
13 |
+
st.session_state.user_input = ""
|
14 |
+
st.session_state.clarification = ""
|
15 |
+
st.session_state.clarify_answer = ""
|
16 |
+
st.session_state.answer = ""
|
17 |
+
st.session_state.sources = ""
|
18 |
+
st.session_state.db = None
|
19 |
+
|
20 |
+
# Display the appropriate UI based on the step
|
21 |
+
if st.session_state.step == 1:
|
22 |
+
user_input = st.text_input("Chiedici qualcosa:", key='user_input_input')
|
23 |
+
if st.button("Invia", key='button_step1'):
|
24 |
+
if user_input:
|
25 |
+
st.session_state.user_input = user_input
|
26 |
+
st.session_state.clarify_answer = get_clarification_answer(user_input)
|
27 |
+
st.session_state.step = 2
|
28 |
+
|
29 |
+
if st.session_state.step == 2:
|
30 |
+
st.write(st.session_state.clarify_answer)
|
31 |
+
clarification_input = st.text_input("", key='clarification_input', value=st.session_state.clarification)
|
32 |
+
|
33 |
+
if st.button("Invia", key='button_step2'):
|
34 |
+
st.session_state.clarification = clarification_input
|
35 |
+
documents = None
|
36 |
+
with st.spinner('Caricando i documenti rilevanti...'):
|
37 |
+
if "blockchain" in clarification_input.lower():
|
38 |
+
documents = load_documents('documents/blockchain')
|
39 |
+
elif "metaverse" in clarification_input.lower():
|
40 |
+
documents = load_documents('documents/metaverso')
|
41 |
+
elif "payment" in clarification_input.lower():
|
42 |
+
documents = load_documents('documents/payment')
|
43 |
+
else:
|
44 |
+
st.write('Per favore, usa il nome corretto degli osservatori che vuoi interrogare. Opzioni valide: "Blockchain", "Metaverse", "Payment".')
|
45 |
+
documents = None
|
46 |
+
|
47 |
+
if documents:
|
48 |
+
with st.spinner('Preparando i documenti...'):
|
49 |
+
st.session_state.db = prepare_documents(documents)
|
50 |
+
with st.spinner('Interrogando ollama...'):
|
51 |
+
st.session_state.answer, st.session_state.sources = get_answer(st.session_state.user_input, st.session_state.db)
|
52 |
+
st.session_state.step = 3
|
53 |
+
|
54 |
+
if st.session_state.step == 3:
|
55 |
+
st.write(st.session_state.answer)
|
56 |
+
st.write("Fonti: " + st.session_state.sources)
|
57 |
+
|
58 |
+
new_question = st.text_input("Fai un'altra domanda:", key='new_question_input')
|
59 |
+
if st.button("Invia", key='button_step3'):
|
60 |
+
if new_question:
|
61 |
+
st.session_state.user_input = new_question
|
62 |
+
with st.spinner('Interrogando ollama...'):
|
63 |
+
st.session_state.answer, st.session_state.sources = get_answer(new_question, st.session_state.db)
|
64 |
+
st.rerun()
|
65 |
+
|
66 |
+
if st.button("Reset", key='button_reset'):
|
67 |
+
st.session_state.step = 1
|
68 |
+
st.session_state.user_input = ""
|
69 |
+
st.session_state.clarification = ""
|
70 |
+
st.session_state.clarify_answer = ""
|
71 |
+
st.session_state.answer = ""
|
72 |
+
st.session_state.sources = ""
|
73 |
+
st.session_state.db = None
|
backend.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from concurrent.futures import ThreadPoolExecutor
|
4 |
+
from pypdf import PdfReader
|
5 |
+
from langchain.text_splitter import CharacterTextSplitter
|
6 |
+
from langchain.vectorstores import FAISS
|
7 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
8 |
+
import ollama
|
9 |
+
import subprocess
|
10 |
+
import time
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
logging.basicConfig(
|
14 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
15 |
+
level=logging.DEBUG
|
16 |
+
)
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
|
19 |
+
load_dotenv()
|
20 |
+
|
21 |
+
logger.debug("Environment variables loaded.")
|
22 |
+
|
23 |
+
def load_single_document(filepath):
|
24 |
+
if filepath.endswith('.pdf'):
|
25 |
+
with open(filepath, 'rb') as file:
|
26 |
+
pdf_reader = PdfReader(file)
|
27 |
+
text = " ".join([page.extract_text() for page in pdf_reader.pages])
|
28 |
+
elif filepath.endswith('.txt'):
|
29 |
+
with open(filepath, 'r', encoding='utf-8') as file:
|
30 |
+
text = file.read()
|
31 |
+
else:
|
32 |
+
logger.warning("Unsupported file type: %s", filepath)
|
33 |
+
return {"content": "", "source": filepath}
|
34 |
+
|
35 |
+
return {"content": text, "source": filepath}
|
36 |
+
|
37 |
+
def load_documents(directory):
|
38 |
+
logger.debug("Loading documents from directory: %s", directory)
|
39 |
+
filepaths = [os.path.join(directory, filename) for filename in os.listdir(directory) if filename.endswith('.pdf') or filename.endswith('.txt')]
|
40 |
+
|
41 |
+
documents = []
|
42 |
+
with ThreadPoolExecutor() as executor:
|
43 |
+
documents = list(executor.map(load_single_document, filepaths))
|
44 |
+
|
45 |
+
logger.debug("Loaded %d documents", len(documents))
|
46 |
+
return documents
|
47 |
+
|
48 |
+
|
49 |
+
documents = []
|
50 |
+
with ThreadPoolExecutor() as executor:
|
51 |
+
documents = list(executor.map(load_single_document, filepaths))
|
52 |
+
|
53 |
+
logger.debug("Loaded %d documents", len(documents))
|
54 |
+
return documents
|
55 |
+
|
56 |
+
def prepare_documents(documents):
|
57 |
+
logger.debug("Preparing documents for embedding.")
|
58 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
59 |
+
texts = text_splitter.create_documents([doc["content"] for doc in documents], metadatas=[{"source": os.path.basename(doc["source"])} for doc in documents])
|
60 |
+
|
61 |
+
embeddings = HuggingFaceEmbeddings()
|
62 |
+
db = FAISS.from_documents(texts, embeddings)
|
63 |
+
logger.debug("Documents prepared and indexed.")
|
64 |
+
return db
|
65 |
+
|
66 |
+
def clarify_ollama(question):
|
67 |
+
max_retries = 3
|
68 |
+
for attempt in range(max_retries):
|
69 |
+
try:
|
70 |
+
response = ollama.chat(model='gemma2:2b', messages=[
|
71 |
+
{
|
72 |
+
'role': 'system',
|
73 |
+
'content': 'Sei un assistente che deve essere sicuro del topic della domanda. Chiedi se la domanda si riferisce agli osservatori "Blockchain", "Payment" oppure "Metaverse"'
|
74 |
+
},
|
75 |
+
{
|
76 |
+
'role': 'user',
|
77 |
+
'content': f"Domanda: {question}"
|
78 |
+
}
|
79 |
+
])
|
80 |
+
return response['message']['content']
|
81 |
+
except Exception as e:
|
82 |
+
logger.error("Errore nella comunicazione con Ollama: %s", e)
|
83 |
+
if attempt < max_retries - 1:
|
84 |
+
logger.debug("Tentativo di riavvio di Ollama...")
|
85 |
+
subprocess.run(["ollama", "serve"], shell=True)
|
86 |
+
time.sleep(5)
|
87 |
+
else:
|
88 |
+
return "Mi dispiace, c'è un problema di comunicazione con il modello. Per favore, verifica che Ollama sia in esecuzione."
|
89 |
+
|
90 |
+
|
91 |
+
def query_ollama(question, context, sources):
|
92 |
+
max_retries = 3
|
93 |
+
for attempt in range(max_retries):
|
94 |
+
try:
|
95 |
+
response = ollama.chat(model='gemma2:2b', messages=[
|
96 |
+
{
|
97 |
+
'role': 'system',
|
98 |
+
'content': 'Sei un assistente che risponde in italiano alle domande basandosi solo sulle informazioni fornite. Cita le fonti quando possibile. Se non trovi informazioni, rispondi "Su questo al momento non posso risponderti. Puoi chiedere maggiori informazioni all\'ufficio di riferimento." e rammenta il topic delle fonti spiegando perché le informazioni richieste non sono disponibili.'
|
99 |
+
},
|
100 |
+
{
|
101 |
+
'role': 'user',
|
102 |
+
'content': f"Contesto: {context}\n\nFonti: {sources}\n\nDomanda: {question}"
|
103 |
+
}
|
104 |
+
])
|
105 |
+
return response['message']['content']
|
106 |
+
except Exception as e:
|
107 |
+
logger.error("Errore nella comunicazione con Ollama: %s", e)
|
108 |
+
if attempt < max_retries - 1:
|
109 |
+
logger.debug("Tentativo di riavvio di Ollama...")
|
110 |
+
subprocess.run(["ollama", "serve"], shell=True)
|
111 |
+
time.sleep(5)
|
112 |
+
else:
|
113 |
+
return "Mi dispiace, c'è un problema di comunicazione con il modello. Per favore, verifica che Ollama sia in esecuzione."
|
114 |
+
|
115 |
+
|
116 |
+
def get_answer(question, db):
|
117 |
+
start_time = time.time()
|
118 |
+
|
119 |
+
docs = db.similarity_search(question, k=3)
|
120 |
+
context = " ".join([doc.page_content for doc in docs])
|
121 |
+
sources = ", ".join(set([doc.metadata['source'] for doc in docs]))
|
122 |
+
|
123 |
+
answer = query_ollama(question, context, sources)
|
124 |
+
end_time = time.time()
|
125 |
+
logger.debug("Similarity search and response received in %.2f seconds.", end_time - start_time)
|
126 |
+
|
127 |
+
return answer, sources
|
128 |
+
|
129 |
+
|
130 |
+
def get_clarification_answer(question):
|
131 |
+
start_time = time.time()
|
132 |
+
|
133 |
+
clarify_answer = clarify_ollama(question)
|
134 |
+
|
135 |
+
end_time = time.time()
|
136 |
+
logger.debug("Clarification response received in %.2f seconds.", end_time - start_time)
|
137 |
+
|
138 |
+
return clarify_answer
|
documents/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
documents/blockchain/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
documents/blockchain/Blockchain.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21003d77565c7df6ee06ed107db50c514a480a383d5d23e85f102cc0de9f635c
|
3 |
+
size 8470233
|
documents/metaverso/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
documents/metaverso/ConvMetaverse.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45b06295c2b2fec0e2dd1be285b042191307745d2d4844f272e31f7692689e9c
|
3 |
+
size 5075895
|
documents/payment/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
documents/payment/Payments.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62d3979f90f1d4c1078615190c6988001325990a2b85033874cde23cab6c166c
|
3 |
+
size 6399467
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pypdf
|
3 |
+
langchain
|
4 |
+
faiss-cpu
|
5 |
+
transformers
|
6 |
+
ollama
|
7 |
+
python-dotenv
|
8 |
+
cryptography
|
9 |
+
sentence-transformers
|
10 |
+
langchain-community
|