Trabis commited on
Commit
a34fa54
·
verified ·
1 Parent(s): 215cb23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +260 -0
app.py CHANGED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core import Settings, VectorStoreIndex, StorageContext, load_index_from_storage
2
+ from llama_index.core.embeddings import BaseEmbedding
3
+ from llama_index.llms.mistralai import MistralAI
4
+ from llama_index.core.node_parser import SentenceSplitter
5
+ from llama_index.core import SimpleDirectoryReader
6
+ from llama_index.core import PromptTemplate
7
+ # from pydantic import PrivateAttr
8
+ # import requests
9
+ from typing import List, Optional, Union
10
+ # from llama_index.core.embeddings.utils import BaseEmbedding
11
+ from llama_index.embeddings.huggingface import HuggingFaceInferenceAPIEmbedding
12
+ # from llama_index.embeddings.huggingface import HuggingFaceEmbedding
13
+ import streamlit as st
14
+ import pickle
15
+
16
+
17
+ mistral_api_key = os.getenv("mistral_api_key")
18
+
19
+ class QASystem:
20
+ def __init__(self,
21
+ mistral_api_key: str = mistral_api_key,
22
+ data_dir: str = "./data",
23
+ storage_dir: str = "./index_llama_136_multilingual-e5-large",
24
+ model_temperature: float = 0.002):
25
+ self.data_dir = data_dir
26
+ self.storage_dir = storage_dir
27
+
28
+ # Initialize embedding model with API
29
+ # api_key =
30
+ self.embedding_model = HuggingFaceInferenceAPIEmbedding(
31
+ model_name="intfloat/multilingual-e5-large",
32
+ )
33
+ # self.embedding_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-large",trust_remote_code=True)
34
+
35
+ self.llm = MistralAI(
36
+ model="mistral-large-latest",
37
+ api_key=mistral_api_key,
38
+ temperature=model_temperature,
39
+ max_tokens=1024
40
+ )
41
+ self._configure_settings()
42
+
43
+ # self.create_index()
44
+
45
+ self.index = self.load_index() # Define index here
46
+
47
+ def _configure_settings(self):
48
+ Settings.llm = self.llm
49
+ Settings.embed_model = self.embedding_model
50
+
51
+ def create_index(self):
52
+ print("creating index")
53
+ documents = SimpleDirectoryReader(self.data_dir).load_data()
54
+ node_parser = SentenceSplitter(chunk_size=206, chunk_overlap=0)
55
+ nodes = node_parser.get_nodes_from_documents(documents, show_progress=True)
56
+
57
+ sentence_index = VectorStoreIndex(nodes, show_progress=True)
58
+ sentence_index.storage_context.persist(self.storage_dir)
59
+
60
+ # # Save the index to a pickle file
61
+ # with open(f"{self.storage_dir}/index.pkl", "wb") as f:
62
+ # pickle.dump(sentence_index, f)
63
+
64
+ return sentence_index
65
+
66
+ def load_index(self):
67
+ # with open(f'{self.storage_dir}/index.pkl', 'rb') as f:
68
+ # sentence_index = pickle.load(f)
69
+
70
+ storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
71
+ return load_index_from_storage(storage_context, embed_model=self.embedding_model)
72
+
73
+ def create_query_engine(self):
74
+ template = """
75
+ استخدم المعلومات التالية للإجابة على السؤال في النهاية. إذا لم تكن تعرف الإجابة، فقل فقط أنك لا تعرف، لا تحاول اختلاق إجابة.
76
+
77
+ {context_str}
78
+ السؤال: {query_str}
79
+ الإجابة بالعربية:
80
+ """
81
+ prompt = PromptTemplate(template=template)
82
+
83
+ return self.index.as_query_engine(
84
+ similarity_top_k=10,
85
+ streaming=True,
86
+ text_qa_template=prompt,
87
+ response_mode="compact", #tree_summarize, simple_summarize, compact
88
+ embed_model=self.embedding_model
89
+ )
90
+
91
+ def query(self, question: str):
92
+ query_engine = self.create_query_engine()
93
+ response = query_engine.query(question)
94
+ return response#.print_response_stream()
95
+
96
+
97
+ # Utilisation de singleton pour éviter les réinitialisations multiples
98
+ @st.cache_resource
99
+ def get_qa_system():
100
+ return QASystem()
101
+
102
+ def main():
103
+ st.markdown("""
104
+ <style>
105
+ @import url('https://fonts.googleapis.com/css2?family=Noto+Kufi+Arabic:wght@100;200;300;400;500;600;700;800;900&display=swap');
106
+
107
+ /* Application globale de la police */
108
+ * {
109
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
110
+ }
111
+
112
+ body {
113
+ text-align: right;
114
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
115
+ }
116
+
117
+ /* Style pour tous les textes */
118
+ p, div, span, button, input, label, h1, h2, h3, h4, h5, h6 {
119
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
120
+ }
121
+
122
+ /* Titre principal avec taille réduite */
123
+ h1 {
124
+ font-size: 1.2em !important;
125
+ margin-bottom: 0.5em !important;
126
+ text-align: center;
127
+ padding: 0.3px;
128
+ }
129
+
130
+ .css-1h9b9rq.e1tzin5v0 {
131
+ direction: rtl;
132
+ text-align: right;
133
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
134
+ }
135
+
136
+ /* Style pour l'expandeur */
137
+ .streamlit-expanderContent, div[data-testid="stExpander"] {
138
+ direction: rtl !important;
139
+ text-align: right !important;
140
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
141
+ }
142
+
143
+ /* Style pour les boutons de l'expandeur */
144
+ button[kind="secondary"] {
145
+ direction: rtl !important;
146
+ text-align: right !important;
147
+ width: 100% !important;
148
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
149
+ font-weight: 30 !important;
150
+ }
151
+
152
+ /* Style pour tous les éléments de texte */
153
+ p, div {
154
+ direction: rtl !important;
155
+ text-align: right !important;
156
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
157
+ }
158
+
159
+ /* Style pour les bullet points */
160
+ ul, li {
161
+ direction: rtl !important;
162
+ text-align: right !important;
163
+ margin-right: 20px !important;
164
+ margin-left: 0 !important;
165
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
166
+ }
167
+
168
+ .stTextInput, .stButton {
169
+ margin-left: auto;
170
+ margin-right: 0;
171
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
172
+ }
173
+
174
+ .stTextInput {
175
+ width: 100% !important;
176
+ direction: rtl;
177
+ text-align: right;
178
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
179
+ }
180
+
181
+ /* Force RTL sur tous les conteneurs */
182
+ .element-container, .stMarkdown {
183
+ direction: rtl !important;
184
+ text-align: right !important;
185
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
186
+ }
187
+
188
+ /* Style spécifique pour l'expandeur des sources */
189
+ .css-1fcdlhc, .css-1629p8f {
190
+ direction: rtl !important;
191
+ text-align: right !important;
192
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
193
+ }
194
+
195
+ /* Style pour le titre */
196
+ .stTitle {
197
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
198
+ font-weight: 700 !important;
199
+ }
200
+
201
+ /* Style pour les boutons */
202
+ .stButton>button {
203
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
204
+ font-weight: 500 !important;
205
+ }
206
+
207
+ /* Style pour les champs de texte */
208
+ .stTextInput>div>div>input {
209
+ font-family: 'Noto Kufi Arabic', sans-serif !important;
210
+ }
211
+ </style>
212
+ """, unsafe_allow_html=True)
213
+
214
+ st.title("هذا تطبيق للاجابة عن الاسئلة المتعلقة بالقانون المغربي ")
215
+ st.title("حاليا يضم 136 قانونا")
216
+
217
+ qa_system = get_qa_system()
218
+
219
+ question = st.text_input("اطرح سؤالك :",placeholder=None)
220
+
221
+ if st.button("بحث"):
222
+ if question:
223
+ response_container = st.empty()
224
+ def stream_response(token):
225
+ if 'current_response' not in st.session_state:
226
+ st.session_state.current_response = ""
227
+ st.session_state.current_response += token
228
+ response_container.markdown(st.session_state.current_response, unsafe_allow_html=True)
229
+
230
+ try:
231
+ query_engine = qa_system.create_query_engine()
232
+ st.session_state.current_response = ""
233
+ response = query_engine.query(question)
234
+
235
+ full_response = ""
236
+ for token in response.response_gen:
237
+ full_response += token
238
+ stream_response(token)
239
+
240
+ if hasattr(response, 'source_nodes'):
241
+ st.markdown("""
242
+ <div style="direction: rtl !important; text-align: right !important; font-family: 'Noto Kufi Arabic', sans-serif !important;">
243
+ <div class="streamlit-expanderHeader">
244
+ المصادر
245
+ </div>
246
+ </div>
247
+ """, unsafe_allow_html=True)
248
+ with st.expander(""):
249
+ for node in response.source_nodes:
250
+ st.markdown(f"""
251
+ <div style="direction: rtl !important; text-align: right !important; font-family: 'Noto Kufi Arabic', sans-serif !important;">
252
+ <p style="text-align: right !important;">مصادر الجواب : {node.metadata.get('file_name', 'Unknown')}</p>
253
+ <p style="text-align: right !important;">Extrait: {node.text[:]}</p>
254
+ </div>
255
+ """, unsafe_allow_html=True)
256
+ except Exception as e:
257
+ st.error(f"Une erreur s'est produite : {e}")
258
+
259
+ if __name__ == "__main__":
260
+ main()