Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core import Settings, VectorStoreIndex, StorageContext, load_index_from_storage
|
2 |
+
from llama_index.core.embeddings import BaseEmbedding
|
3 |
+
from llama_index.llms.mistralai import MistralAI
|
4 |
+
from llama_index.core.node_parser import SentenceSplitter
|
5 |
+
from llama_index.core import SimpleDirectoryReader
|
6 |
+
from llama_index.core import PromptTemplate
|
7 |
+
# from pydantic import PrivateAttr
|
8 |
+
# import requests
|
9 |
+
from typing import List, Optional, Union
|
10 |
+
# from llama_index.core.embeddings.utils import BaseEmbedding
|
11 |
+
from llama_index.embeddings.huggingface import HuggingFaceInferenceAPIEmbedding
|
12 |
+
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
13 |
+
import streamlit as st
|
14 |
+
import pickle
|
15 |
+
|
16 |
+
|
17 |
+
mistral_api_key = os.getenv("mistral_api_key")
|
18 |
+
|
19 |
+
class QASystem:
|
20 |
+
def __init__(self,
|
21 |
+
mistral_api_key: str = mistral_api_key,
|
22 |
+
data_dir: str = "./data",
|
23 |
+
storage_dir: str = "./index_llama_136_multilingual-e5-large",
|
24 |
+
model_temperature: float = 0.002):
|
25 |
+
self.data_dir = data_dir
|
26 |
+
self.storage_dir = storage_dir
|
27 |
+
|
28 |
+
# Initialize embedding model with API
|
29 |
+
# api_key =
|
30 |
+
self.embedding_model = HuggingFaceInferenceAPIEmbedding(
|
31 |
+
model_name="intfloat/multilingual-e5-large",
|
32 |
+
)
|
33 |
+
# self.embedding_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-large",trust_remote_code=True)
|
34 |
+
|
35 |
+
self.llm = MistralAI(
|
36 |
+
model="mistral-large-latest",
|
37 |
+
api_key=mistral_api_key,
|
38 |
+
temperature=model_temperature,
|
39 |
+
max_tokens=1024
|
40 |
+
)
|
41 |
+
self._configure_settings()
|
42 |
+
|
43 |
+
# self.create_index()
|
44 |
+
|
45 |
+
self.index = self.load_index() # Define index here
|
46 |
+
|
47 |
+
def _configure_settings(self):
|
48 |
+
Settings.llm = self.llm
|
49 |
+
Settings.embed_model = self.embedding_model
|
50 |
+
|
51 |
+
def create_index(self):
|
52 |
+
print("creating index")
|
53 |
+
documents = SimpleDirectoryReader(self.data_dir).load_data()
|
54 |
+
node_parser = SentenceSplitter(chunk_size=206, chunk_overlap=0)
|
55 |
+
nodes = node_parser.get_nodes_from_documents(documents, show_progress=True)
|
56 |
+
|
57 |
+
sentence_index = VectorStoreIndex(nodes, show_progress=True)
|
58 |
+
sentence_index.storage_context.persist(self.storage_dir)
|
59 |
+
|
60 |
+
# # Save the index to a pickle file
|
61 |
+
# with open(f"{self.storage_dir}/index.pkl", "wb") as f:
|
62 |
+
# pickle.dump(sentence_index, f)
|
63 |
+
|
64 |
+
return sentence_index
|
65 |
+
|
66 |
+
def load_index(self):
|
67 |
+
# with open(f'{self.storage_dir}/index.pkl', 'rb') as f:
|
68 |
+
# sentence_index = pickle.load(f)
|
69 |
+
|
70 |
+
storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
|
71 |
+
return load_index_from_storage(storage_context, embed_model=self.embedding_model)
|
72 |
+
|
73 |
+
def create_query_engine(self):
|
74 |
+
template = """
|
75 |
+
استخدم المعلومات التالية للإجابة على السؤال في النهاية. إذا لم تكن تعرف الإجابة، فقل فقط أنك لا تعرف، لا تحاول اختلاق إجابة.
|
76 |
+
|
77 |
+
{context_str}
|
78 |
+
السؤال: {query_str}
|
79 |
+
الإجابة بالعربية:
|
80 |
+
"""
|
81 |
+
prompt = PromptTemplate(template=template)
|
82 |
+
|
83 |
+
return self.index.as_query_engine(
|
84 |
+
similarity_top_k=10,
|
85 |
+
streaming=True,
|
86 |
+
text_qa_template=prompt,
|
87 |
+
response_mode="compact", #tree_summarize, simple_summarize, compact
|
88 |
+
embed_model=self.embedding_model
|
89 |
+
)
|
90 |
+
|
91 |
+
def query(self, question: str):
|
92 |
+
query_engine = self.create_query_engine()
|
93 |
+
response = query_engine.query(question)
|
94 |
+
return response#.print_response_stream()
|
95 |
+
|
96 |
+
|
97 |
+
# Utilisation de singleton pour éviter les réinitialisations multiples
|
98 |
+
@st.cache_resource
|
99 |
+
def get_qa_system():
|
100 |
+
return QASystem()
|
101 |
+
|
102 |
+
def main():
|
103 |
+
st.markdown("""
|
104 |
+
<style>
|
105 |
+
@import url('https://fonts.googleapis.com/css2?family=Noto+Kufi+Arabic:wght@100;200;300;400;500;600;700;800;900&display=swap');
|
106 |
+
|
107 |
+
/* Application globale de la police */
|
108 |
+
* {
|
109 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
110 |
+
}
|
111 |
+
|
112 |
+
body {
|
113 |
+
text-align: right;
|
114 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
115 |
+
}
|
116 |
+
|
117 |
+
/* Style pour tous les textes */
|
118 |
+
p, div, span, button, input, label, h1, h2, h3, h4, h5, h6 {
|
119 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
120 |
+
}
|
121 |
+
|
122 |
+
/* Titre principal avec taille réduite */
|
123 |
+
h1 {
|
124 |
+
font-size: 1.2em !important;
|
125 |
+
margin-bottom: 0.5em !important;
|
126 |
+
text-align: center;
|
127 |
+
padding: 0.3px;
|
128 |
+
}
|
129 |
+
|
130 |
+
.css-1h9b9rq.e1tzin5v0 {
|
131 |
+
direction: rtl;
|
132 |
+
text-align: right;
|
133 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
134 |
+
}
|
135 |
+
|
136 |
+
/* Style pour l'expandeur */
|
137 |
+
.streamlit-expanderContent, div[data-testid="stExpander"] {
|
138 |
+
direction: rtl !important;
|
139 |
+
text-align: right !important;
|
140 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
141 |
+
}
|
142 |
+
|
143 |
+
/* Style pour les boutons de l'expandeur */
|
144 |
+
button[kind="secondary"] {
|
145 |
+
direction: rtl !important;
|
146 |
+
text-align: right !important;
|
147 |
+
width: 100% !important;
|
148 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
149 |
+
font-weight: 30 !important;
|
150 |
+
}
|
151 |
+
|
152 |
+
/* Style pour tous les éléments de texte */
|
153 |
+
p, div {
|
154 |
+
direction: rtl !important;
|
155 |
+
text-align: right !important;
|
156 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
157 |
+
}
|
158 |
+
|
159 |
+
/* Style pour les bullet points */
|
160 |
+
ul, li {
|
161 |
+
direction: rtl !important;
|
162 |
+
text-align: right !important;
|
163 |
+
margin-right: 20px !important;
|
164 |
+
margin-left: 0 !important;
|
165 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
166 |
+
}
|
167 |
+
|
168 |
+
.stTextInput, .stButton {
|
169 |
+
margin-left: auto;
|
170 |
+
margin-right: 0;
|
171 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
172 |
+
}
|
173 |
+
|
174 |
+
.stTextInput {
|
175 |
+
width: 100% !important;
|
176 |
+
direction: rtl;
|
177 |
+
text-align: right;
|
178 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
179 |
+
}
|
180 |
+
|
181 |
+
/* Force RTL sur tous les conteneurs */
|
182 |
+
.element-container, .stMarkdown {
|
183 |
+
direction: rtl !important;
|
184 |
+
text-align: right !important;
|
185 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
186 |
+
}
|
187 |
+
|
188 |
+
/* Style spécifique pour l'expandeur des sources */
|
189 |
+
.css-1fcdlhc, .css-1629p8f {
|
190 |
+
direction: rtl !important;
|
191 |
+
text-align: right !important;
|
192 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
193 |
+
}
|
194 |
+
|
195 |
+
/* Style pour le titre */
|
196 |
+
.stTitle {
|
197 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
198 |
+
font-weight: 700 !important;
|
199 |
+
}
|
200 |
+
|
201 |
+
/* Style pour les boutons */
|
202 |
+
.stButton>button {
|
203 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
204 |
+
font-weight: 500 !important;
|
205 |
+
}
|
206 |
+
|
207 |
+
/* Style pour les champs de texte */
|
208 |
+
.stTextInput>div>div>input {
|
209 |
+
font-family: 'Noto Kufi Arabic', sans-serif !important;
|
210 |
+
}
|
211 |
+
</style>
|
212 |
+
""", unsafe_allow_html=True)
|
213 |
+
|
214 |
+
st.title("هذا تطبيق للاجابة عن الاسئلة المتعلقة بالقانون المغربي ")
|
215 |
+
st.title("حاليا يضم 136 قانونا")
|
216 |
+
|
217 |
+
qa_system = get_qa_system()
|
218 |
+
|
219 |
+
question = st.text_input("اطرح سؤالك :",placeholder=None)
|
220 |
+
|
221 |
+
if st.button("بحث"):
|
222 |
+
if question:
|
223 |
+
response_container = st.empty()
|
224 |
+
def stream_response(token):
|
225 |
+
if 'current_response' not in st.session_state:
|
226 |
+
st.session_state.current_response = ""
|
227 |
+
st.session_state.current_response += token
|
228 |
+
response_container.markdown(st.session_state.current_response, unsafe_allow_html=True)
|
229 |
+
|
230 |
+
try:
|
231 |
+
query_engine = qa_system.create_query_engine()
|
232 |
+
st.session_state.current_response = ""
|
233 |
+
response = query_engine.query(question)
|
234 |
+
|
235 |
+
full_response = ""
|
236 |
+
for token in response.response_gen:
|
237 |
+
full_response += token
|
238 |
+
stream_response(token)
|
239 |
+
|
240 |
+
if hasattr(response, 'source_nodes'):
|
241 |
+
st.markdown("""
|
242 |
+
<div style="direction: rtl !important; text-align: right !important; font-family: 'Noto Kufi Arabic', sans-serif !important;">
|
243 |
+
<div class="streamlit-expanderHeader">
|
244 |
+
المصادر
|
245 |
+
</div>
|
246 |
+
</div>
|
247 |
+
""", unsafe_allow_html=True)
|
248 |
+
with st.expander(""):
|
249 |
+
for node in response.source_nodes:
|
250 |
+
st.markdown(f"""
|
251 |
+
<div style="direction: rtl !important; text-align: right !important; font-family: 'Noto Kufi Arabic', sans-serif !important;">
|
252 |
+
<p style="text-align: right !important;">مصادر الجواب : {node.metadata.get('file_name', 'Unknown')}</p>
|
253 |
+
<p style="text-align: right !important;">Extrait: {node.text[:]}</p>
|
254 |
+
</div>
|
255 |
+
""", unsafe_allow_html=True)
|
256 |
+
except Exception as e:
|
257 |
+
st.error(f"Une erreur s'est produite : {e}")
|
258 |
+
|
259 |
+
if __name__ == "__main__":
|
260 |
+
main()
|