Anuttama Chakraborty commited on
Commit
80a3a2e
·
1 Parent(s): 4376d5f

gradio integration first commit

Browse files
RagWithConfidenceScore.py ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import pandas as pd
4
+ from transformers import AutoTokenizer, AutoModel, pipeline
5
+ from sentence_transformers import SentenceTransformer, CrossEncoder
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_community.document_loaders import PyPDFLoader, CSVLoader
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain.prompts import PromptTemplate
11
+ from pathlib import Path
12
+ from langchain_huggingface import HuggingFaceEmbeddings
13
+ from langchain_community.document_loaders import DirectoryLoader
14
+
15
+ class RagWithScore:
16
+ def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2",
17
+ cross_encoder_name="cross-encoder/ms-marco-MiniLM-L-6-v2",
18
+ llm_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
19
+ documents_dir="financial_docs"):
20
+ """
21
+ Initialize the Financial RAG system
22
+
23
+ Args:
24
+ model_name: The embedding model name
25
+ cross_encoder_name: The cross-encoder model for reranking
26
+ llm_name: Small language model for generation
27
+ documents_dir: Directory containing financial statements
28
+ """
29
+ # Initialize embedding model
30
+ self.embedding_model = HuggingFaceEmbeddings(model_name=model_name)
31
+
32
+ # Initialize cross-encoder for reranking
33
+ self.cross_encoder = CrossEncoder(cross_encoder_name)
34
+
35
+ # Initialize small language model
36
+ self.tokenizer = AutoTokenizer.from_pretrained(llm_name)
37
+ self.llm = pipeline(
38
+ "text-generation",
39
+ model=llm_name,
40
+ tokenizer=self.tokenizer,
41
+ torch_dtype=torch.bfloat16,
42
+ device_map="auto",
43
+ max_new_tokens=512,
44
+ do_sample=False, # Set to False for deterministic outputs
45
+ temperature=0.2, # Reduce randomness
46
+ top_p=1.0 # No nucleus sampling
47
+ )
48
+
49
+ # Store paths
50
+ self.documents_dir = documents_dir
51
+ self.vector_store = None
52
+
53
+ # Input guardrail rules - sensitive terms/patterns
54
+ self.guardrail_patterns = [
55
+ "insider trading",
56
+ "stock manipulation",
57
+ "fraud detection",
58
+ "embezzlement",
59
+ "money laundering",
60
+ "tax evasion",
61
+ "illegal activities"
62
+ ]
63
+
64
+ # Confidence score thresholds
65
+ self.confidence_thresholds = {
66
+ "high": 0.75,
67
+ "medium": 0.5,
68
+ "low": 0.3
69
+ }
70
+
71
+ import os
72
+
73
+ def load_and_process_documents(self):
74
+ """Load, split and process financial documents"""
75
+
76
+ print("Processing documents to create FAISS index...")
77
+ loader = DirectoryLoader('./financial_docs', glob="**/*.pdf")
78
+ documents = loader.load()
79
+
80
+ # Split documents into chunks
81
+ text_splitter = RecursiveCharacterTextSplitter(
82
+ chunk_size=1000, chunk_overlap=200
83
+ )
84
+ chunks = text_splitter.split_documents(documents)
85
+ print(len(chunks))
86
+
87
+ # Create and save FAISS vector store
88
+ self.vector_store = FAISS.from_documents(chunks, embedding=self.embedding_model)
89
+ self.vector_store.save_local("faiss_index")
90
+
91
+ return self.vector_store
92
+
93
+ def load_or_create_vector_store(self):
94
+ try:
95
+ print("Loading existing FAISS index...")
96
+ self.vector_store = FAISS.load_local("faiss_index", self.embedding_model)
97
+ print("FAISS index loaded successfully")
98
+ except Exception as e:
99
+ print(f"Error loading FAISS index: {e}")
100
+ print("Creating new FAISS index...")
101
+ # Code to create a new vector store
102
+ documents = self.load_and_process_documents() # Make sure this method exists
103
+ print("New FAISS index created and saved")
104
+
105
+
106
+ def generate_answer(self, query, context):
107
+ """Generate answer based on retrieved context"""
108
+ prompt_template = """
109
+ You are a financial analyst assistant that helps answer questions about company financial statements.
110
+ Use the provided financial information to give accurate and helpful answers.
111
+
112
+ Context information from financial statements:
113
+ {context}
114
+
115
+ Question: {query}
116
+
117
+ If the question requires a numerical calculation, show the step-by-step logic behind the calculation before providing the final answer.
118
+ Ensure that your approach remains consistent in methodology across different queries.
119
+
120
+ Provide a concise answer based only on the given context. You dont have to provide sources. If you don't have enough information to answer,
121
+ say so clearly.
122
+
123
+ Answer:
124
+ """
125
+
126
+ # Format context into a single string
127
+ context_str = "\n\n".join([doc.page_content for doc in context])
128
+
129
+ # Format prompt
130
+ prompt = prompt_template.format(context=context_str, query=query)
131
+
132
+ # Generate answer using small language model
133
+ response = self.llm(prompt)[0]['generated_text']
134
+
135
+ # Extract only the generated answer part (after the prompt)
136
+ answer = response[len(prompt):].strip()
137
+
138
+ return answer
139
+
140
+ def calculate_confidence_score(self, query, retrieved_docs, answer):
141
+ """A simpler confidence score calculation focused on consistency and LLM confidence"""
142
+
143
+ # Get LLM confidence
144
+ llm_confidence = self._get_llm_confidence(query, retrieved_docs, answer)
145
+
146
+ # Get consistency score
147
+ consistency_score = self._measure_answer_consistency(query, retrieved_docs, answer)
148
+
149
+ # Simple weighted average
150
+ confidence_score = (0.6 * consistency_score) + (0.4 * llm_confidence)
151
+
152
+ return confidence_score
153
+
154
+ # def calculate_confidence_score(self, query, retrieved_docs, answer):
155
+ # """
156
+ # Calculate confidence score based on multiple factors:
157
+ # 1. Retrieval similarity scores
158
+ # 2. Reranking scores
159
+ # 3. Answer consistency across documents
160
+ # 4. LLM-based confidence estimation
161
+
162
+ # Returns:
163
+ # float: Confidence score between 0 and 1
164
+ # """
165
+ # # 1. Calculate average similarity/relevance score from retrieved documents
166
+ # retrieval_scores = []
167
+ # for doc in retrieved_docs:
168
+ # if hasattr(doc, 'metadata') and 'score' in doc.metadata:
169
+ # retrieval_scores.append(doc.metadata['score'])
170
+
171
+ # avg_retrieval_score = sum(retrieval_scores) / len(retrieval_scores) if retrieval_scores else 0.0
172
+
173
+ # print(f"avg_retrieval_score : {avg_retrieval_score}")
174
+
175
+ # # 2. Use cross-encoder scores as a stronger relevance signal
176
+ # pairs = [(query, doc.page_content) for doc in retrieved_docs]
177
+ # cross_encoder_scores = self.cross_encoder.predict(pairs) if pairs else []
178
+ # avg_cross_encoder_score = sum(cross_encoder_scores) / len(cross_encoder_scores) if len(cross_encoder_scores) > 0 else 0.0
179
+
180
+ # print(f"avg_cross_encoder_score : {avg_cross_encoder_score}")
181
+ # # 3. Measure answer consistency across documents
182
+ # consistency_score = self._measure_answer_consistency(query, retrieved_docs, answer)
183
+
184
+ # print(f"consistency_score : {consistency_score}")
185
+
186
+ # # 4. LLM-based confidence estimation
187
+ # llm_confidence = self._get_llm_confidence(query, retrieved_docs, answer)
188
+
189
+ # print(f"llm_confidence : {llm_confidence}")
190
+
191
+
192
+ # # Combine all factors (adjust weights based on what's most important for your use case)
193
+ # weights = {
194
+ # 'retrieval': 0.2,
195
+ # 'cross_encoder': 0.3,
196
+ # 'consistency': 0.3,
197
+ # 'llm_confidence': 0.2
198
+ # }
199
+
200
+ # confidence_score = (
201
+ # weights['retrieval'] * avg_retrieval_score +
202
+ # # weights['cross_encoder'] * avg_cross_encoder_score +
203
+ # weights['consistency'] * consistency_score +
204
+ # weights['llm_confidence'] * llm_confidence
205
+ # )
206
+
207
+ # # Normalize to 0-1 range if needed
208
+ # total_weight = weights['retrieval'] + weights['consistency'] + weights['llm_confidence']
209
+ # confidence_score = confidence_score / total_weight
210
+ # # confidence_score = min(max(confidence_score, 0.0), 1.0)
211
+
212
+ # return confidence_score
213
+
214
+ def _measure_answer_consistency(self, query, retrieved_docs, final_answer):
215
+ """
216
+ Measure consistency of the answer across multiple documents
217
+
218
+ Returns:
219
+ float: Consistency score between 0 and 1
220
+ """
221
+ if len(retrieved_docs) <= 1:
222
+ return 0.5 # Neutral score if we only have one document
223
+
224
+ # Generate individual answers from each document
225
+ individual_answers = []
226
+ for doc in retrieved_docs:
227
+ prompt = f"""
228
+ Based only on this specific financial information:
229
+ {doc.page_content}
230
+
231
+ Question: {query}
232
+
233
+ Provide a very brief answer (1-2 sentences maximum):
234
+ """
235
+ response = self.llm(prompt, max_new_tokens=100)[0]['generated_text']
236
+ answer = response[len(prompt):].strip()
237
+ # print(f"llm response validation : {answer}")
238
+ individual_answers.append(answer)
239
+
240
+ # Calculate semantic similarity between individual answers
241
+ # Using embedding model to calculate similarity
242
+ answer_embeddings = self.embedding_model.embed_documents(individual_answers + [final_answer])
243
+
244
+ # Calculate similarity of each individual answer to the final answer
245
+ final_answer_embedding = answer_embeddings[-1] # Last embedding is for the final answer
246
+ individual_embeddings = answer_embeddings[:-1] # All other embeddings
247
+
248
+ similarities = []
249
+ for emb in individual_embeddings:
250
+ # Calculate cosine similarity
251
+ dot_product = sum(a * b for a, b in zip(emb, final_answer_embedding))
252
+ magnitude_a = sum(a * a for a in emb) ** 0.5
253
+ magnitude_b = sum(b * b for b in final_answer_embedding) ** 0.5
254
+ similarity = dot_product / (magnitude_a * magnitude_b) if magnitude_a * magnitude_b > 0 else 0
255
+ similarities.append(similarity)
256
+
257
+ # Average similarity represents consistency
258
+ avg_similarity = sum(similarities) / len(similarities) if similarities else 0.5
259
+
260
+ return avg_similarity
261
+
262
+ def _get_llm_confidence(self, query, retrieved_docs, answer):
263
+ """
264
+ Ask the LLM to estimate its own confidence in the answer
265
+
266
+ Returns:
267
+ float: LLM confidence score between 0 and 1
268
+ """
269
+ # Concatenate retrieved contexts
270
+ context = "\n\n".join([doc.page_content for doc in retrieved_docs[:2]]) # Limit to top 2 to avoid token limit
271
+
272
+ # Create confidence estimation prompt
273
+ prompt = f"""
274
+ You are evaluating the confidence level of an answer to a financial question.
275
+
276
+ Question: {query}
277
+
278
+ Retrieved Context:
279
+ {context}
280
+
281
+ Generated Answer: {answer}
282
+
283
+ On a scale of 1 to 10, how confident are you that the answer is correct and supported by the retrieved context?
284
+ Provide only a number between 1 and 10, with 10 being extremely confident and 1 being not confident at all.
285
+ """
286
+
287
+ # Get confidence score from LLM
288
+ response = self.llm(prompt, max_new_tokens=10)[0]['generated_text']
289
+
290
+ # Extract numeric confidence score
291
+ try:
292
+ # Try to find a number in the response
293
+ import re
294
+ numbers = re.findall(r'\b([1-9]|10)\b', response)
295
+ if numbers:
296
+ llm_confidence = float(numbers[0]) / 10.0 # Normalize to 0-1
297
+ else:
298
+ llm_confidence = 0.5 # Default neutral value
299
+ except:
300
+ llm_confidence = 0.5 # Default neutral value
301
+
302
+ return llm_confidence
303
+
304
+ def get_confidence_level(self, confidence_score):
305
+ """
306
+ Convert numerical confidence score to a level (high, medium, low)
307
+
308
+ Args:
309
+ confidence_score: Float between 0 and 1
310
+
311
+ Returns:
312
+ str: Confidence level ("high", "medium", or "low")
313
+ """
314
+ if confidence_score >= self.confidence_thresholds["high"]:
315
+ return "high"
316
+ elif confidence_score >= self.confidence_thresholds["medium"]:
317
+ return "medium"
318
+ elif confidence_score >= self.confidence_thresholds["low"]:
319
+ return "low"
320
+ else:
321
+ return "very low"
322
+
323
+ def apply_input_guardrail(self, query):
324
+ """Check if query violates input guardrails"""
325
+ query_lower = query.lower()
326
+
327
+ for pattern in self.guardrail_patterns:
328
+ if pattern in query_lower:
329
+ return True, f"I cannot process queries about {pattern}. Please reformulate your question."
330
+
331
+ return False, ""
332
+
333
+ def retrieve_with_reranking(self, query, top_k=10, rerank_top_k=5):
334
+ """Retrieve relevant chunks and rerank them with cross-encoder"""
335
+ # Initial retrieval using embedding similarity
336
+ docs_and_scores = self.vector_store.similarity_search_with_score(query, k=top_k)
337
+
338
+ # Sort retrieved documents by FAISS similarity score (deterministic sorting)
339
+ docs_and_scores.sort(key=lambda x: x[1], reverse=True)
340
+
341
+ # Prepare pairs for cross-encoder
342
+ pairs = [(query, doc.page_content) for doc, _ in docs_and_scores]
343
+
344
+ # Get scores from cross-encoder
345
+ scores = self.cross_encoder.predict(pairs)
346
+
347
+ # Sort by cross-encoder scores
348
+ reranked_results = sorted(zip(docs_and_scores, scores), key=lambda x: x[1], reverse=True)
349
+
350
+ # Return the top reranked results
351
+ return [doc for (doc, _), _ in reranked_results[:rerank_top_k]]
352
+
353
+ def is_financial_question(self,query):
354
+ financial_keywords = [
355
+ "finance", "financial", "revenue", "profit", "loss", "EBITDA", "cash flow",
356
+ "balance sheet", "income statement", "stock", "bond", "investment", "risk",
357
+ "interest rate", "inflation", "debt", "equity", "valuation", "dividend",
358
+ "market", "economy", "GDP", "currency", "exchange rate", "tax", "audit",
359
+ "compliance", "regulation", "SEC", "earnings", "capital", "asset", "liability"
360
+ ]
361
+ query_lower = query.lower()
362
+ return any(keyword in query_lower for keyword in financial_keywords)
363
+
364
+ def answer_question(self, query):
365
+ """End-to-end pipeline to answer a question with confidence score"""
366
+
367
+ if not self.is_financial_question(query):
368
+ return {
369
+ "answer": "This question is outside the scope of financial data. Please ask a question related to finance.",
370
+ "source_documents": [],
371
+ "blocked": True,
372
+ "confidence_score": 0,
373
+ "confidence_level": "none"
374
+ }
375
+ # Apply input guardrail
376
+ blocked, message = self.apply_input_guardrail(query)
377
+ if blocked:
378
+ return {"answer": message, "source_documents": [], "blocked": True, "confidence_score": 0, "confidence_level": "none"}
379
+
380
+ # Retrieve and rerank relevant contexts
381
+ reranked_docs = self.retrieve_with_reranking(query)
382
+
383
+ # Generate answer
384
+ answer = self.generate_answer(query, reranked_docs)
385
+
386
+ # Calculate confidence score
387
+ confidence_score = self.calculate_confidence_score(query, reranked_docs, answer)
388
+
389
+ confidence_level = self.get_confidence_level(confidence_score)
390
+
391
+ return {
392
+ "answer": answer,
393
+ "source_documents": reranked_docs,
394
+ "blocked": False,
395
+ "confidence_score": confidence_score,
396
+ "confidence_level": confidence_level
397
+ }
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from RagWithConfidenceScore import RagWithScore #
3
+
4
+ # Initialize the RAG system
5
+ rag_system = RagWithScore()
6
+
7
+ # Load or create the vector store
8
+ rag_system.load_and_process_documents()
9
+
10
+ # Define the function to handle user queries
11
+ def answer_financial_query(query):
12
+ # Use the RAG system to answer the question
13
+ result = rag_system.answer_question(query)
14
+
15
+ # Format the output
16
+ answer = result["answer"]
17
+ confidence_score = result["confidence_score"]
18
+ confidence_level = result["confidence_level"]
19
+ sources = "\n\n".join([doc.page_content for doc in result["source_documents"]])
20
+
21
+ return answer, f"{confidence_score:.2f}", confidence_level, sources
22
+
23
+ # Return the results
24
+ # return {
25
+ # "Answer": answer,
26
+ # "Confidence Score": f"{confidence_score:.2f}",
27
+ # "Confidence Level": confidence_level,
28
+ # "Source Documents": sources
29
+ # }
30
+
31
+ # Create a Gradio interface
32
+ interface = gr.Interface(
33
+ fn=answer_financial_query, # Function to call
34
+ inputs=gr.Textbox(lines=2, placeholder="Enter your financial query here..."), # Input component
35
+ outputs=[ # Output components
36
+ gr.Textbox(label="Answer"),
37
+ gr.Textbox(label="Confidence Score"),
38
+ gr.Textbox(label="Confidence Level")
39
+ # gr.Textbox(label="Source Documents", lines=10)
40
+ ],
41
+ title="Financial RAG System",
42
+ description="Ask questions about financial data and get answers powered by Retrieval-Augmented Generation (RAG).",
43
+ examples=[
44
+ ["What is the current revenue growth rate?"],
45
+ ["Explain the concept of EBITDA."],
46
+ ["What are the key financial risks mentioned in the report?"]
47
+ ]
48
+ )
49
+
50
+ # Launch the interface
51
+ interface.launch()
financial_docs/JPMorgan Chase Bank, N.A. 2024 Annual Consolidated Financial Statements - Final.pdf ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ pandas
3
+ transformers
4
+ sentence-transformers
5
+ scikit-learn
6
+ langchain
7
+ langchain-community
8
+ faiss-cpu
9
+ accelerate>=0.26.0
10
+ unstructured
11
+ unstructured[pdf]
12
+ langchain_huggingface
13
+ gradio