shivXy commited on
Commit
4956fb0
Β·
1 Parent(s): 59ee619

adding rest of ragas metrics

Browse files
Files changed (1) hide show
  1. app.py +60 -14
app.py CHANGED
@@ -12,7 +12,7 @@ from qdrant_client import QdrantClient
12
  from langchain_openai import OpenAIEmbeddings
13
  import os
14
  from ragas import evaluate
15
- from ragas.metrics import answer_relevancy
16
  from langchain_core.documents import Document
17
  import json
18
  import numpy as np
@@ -26,12 +26,12 @@ load_dotenv()
26
  # Load OpenAI Model
27
  llm = ChatOpenAI(model="gpt-4o-mini")
28
  qd_api_key = os.getenv("QDRANT_CLOUD_API_KEY")
29
- EVALUATION_MODE = os.getenv("EVALUATION_MODE", "false").lower() == "false"
30
 
31
 
32
  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
33
 
34
- # βœ… Initialize Qdrant Client
35
  qd_client = QdrantClient(
36
  "https://40c458f2-24a9-4153-b15b-0addf6a6bbcf.us-east-1-0.aws.cloud.qdrant.io:6333",
37
  api_key=qd_api_key
@@ -65,31 +65,74 @@ def search(query_vector, top_k=1) -> list:
65
 
66
  return return_hits
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def evaluate_retrieved_docs(question: str, retrieved_docs: list):
69
  """Evaluate the retrieved documents using RAGAS metrics."""
70
 
71
- # βœ… Extract document content from metadata
72
  ragas_docs = [
73
  Document(page_content=hit["metadata"].get("content", ""))
74
  for hit in retrieved_docs
75
  if "content" in hit["metadata"] and hit["metadata"]["content"]
76
  ]
77
 
78
- # 🚨 Debugging Output
79
  print("πŸ” Debug: RAGAS Docs Format:", ragas_docs)
80
 
81
  if not ragas_docs:
82
  print("⚠️ No relevant documents to evaluate.")
83
  return 0 # Return low score if no documents found
84
 
85
- # βœ… Construct required input
86
  queries = [question]
87
  contexts = [[doc.page_content for doc in ragas_docs]]
88
 
89
  print("βœ… Debug: Queries ->", queries)
90
  print("βœ… Debug: Contexts ->", contexts)
91
 
92
- # βœ… Run evaluation
93
  scores = evaluate(
94
  queries=queries,
95
  contexts=contexts,
@@ -191,21 +234,24 @@ def research_node(state) -> dict:
191
  query_vector = embedding_model.embed_query(question)
192
 
193
  # Query Qdrant with the vector
194
- relevant_docs = search(query_vector=query_vector, top_k=1)
195
 
196
- if EVALUATION_MODE:
197
- # Evaluate retrieved documents using RAGAS
198
- relevance_score = evaluate_retrieved_docs(question, relevant_docs)
199
- print(f"πŸ“Š [Evaluation Mode] RAGAS Score: {relevance_score}")
200
 
201
- if relevant_docs[0]['score'] > 0.5: # Threshold for good retrieval quality this will be the cosine similarity score
202
  # Found relevant document β†’ Summarize it
203
  document_name = relevant_docs[0]["metadata"].get("document_name", "No source available.")
204
  document_text = get_document_by_name(document_name)
205
-
206
  messages = summary_prompt.format_messages(document=document_text)
207
  response = llm.invoke(messages)
208
 
 
 
 
 
 
 
 
209
  return {**state, "messages": state["messages"] + [HumanMessage(content=response.content)], "_next": "post_processing"}
210
 
211
  else:
 
12
  from langchain_openai import OpenAIEmbeddings
13
  import os
14
  from ragas import evaluate
15
+ from ragas.metrics import answer_relevancy, faithfulness, context_precision, context_recall
16
  from langchain_core.documents import Document
17
  import json
18
  import numpy as np
 
26
  # Load OpenAI Model
27
  llm = ChatOpenAI(model="gpt-4o-mini")
28
  qd_api_key = os.getenv("QDRANT_CLOUD_API_KEY")
29
+ EVALUATION_MODE = os.getenv("EVALUATION_MODE", "false").lower() == "true"
30
 
31
 
32
  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
33
 
34
+ # Initialize Qdrant Client
35
  qd_client = QdrantClient(
36
  "https://40c458f2-24a9-4153-b15b-0addf6a6bbcf.us-east-1-0.aws.cloud.qdrant.io:6333",
37
  api_key=qd_api_key
 
65
 
66
  return return_hits
67
 
68
+ def evaluate_ragas_metrics(question: str, model_answer: str, retrieved_docs: list):
69
+ """Evaluate faithfulness, context precision, and context recall using RAGAS."""
70
+
71
+ # Extract document content from metadata
72
+ ragas_docs = [
73
+ Document(page_content=hit["metadata"].get("content", ""))
74
+ for hit in retrieved_docs if "content" in hit["metadata"] and hit["metadata"]["content"]
75
+ ]
76
+
77
+ if not ragas_docs:
78
+ print("⚠️ No relevant documents to evaluate.")
79
+ return {"faithfulness": 0, "context_precision": 0, "context_recall": 0}
80
+
81
+ # Construct required input
82
+ queries = [question]
83
+ generated_answers = [model_answer]
84
+ contexts = [[doc.page_content for doc in ragas_docs]]
85
+
86
+ # Run evaluation
87
+ scores = evaluate(
88
+ queries=queries,
89
+ contexts=contexts,
90
+ generated_answers=generated_answers,
91
+ metrics=[faithfulness, context_precision, context_recall]
92
+ )
93
+
94
+ print("πŸ“Š Debug: RAGAS Metrics Output ->", scores)
95
+
96
+ # Extract individual scores
97
+ faithfulness_score = scores.iloc[0]["faithfulness"]
98
+ context_precision_score = scores.iloc[0]["context_precision"]
99
+ context_recall_score = scores.iloc[0]["context_recall"]
100
+
101
+ print(f"πŸ“Š Faithfulness Score: {faithfulness_score}")
102
+ print(f"πŸ“Š Context Precision Score: {context_precision_score}")
103
+ print(f"πŸ“Š Context Recall Score: {context_recall_score}")
104
+
105
+ return {
106
+ "faithfulness": faithfulness_score,
107
+ "context_precision": context_precision_score,
108
+ "context_recall": context_recall_score
109
+ }
110
+
111
  def evaluate_retrieved_docs(question: str, retrieved_docs: list):
112
  """Evaluate the retrieved documents using RAGAS metrics."""
113
 
114
+ # Extract document content from metadata
115
  ragas_docs = [
116
  Document(page_content=hit["metadata"].get("content", ""))
117
  for hit in retrieved_docs
118
  if "content" in hit["metadata"] and hit["metadata"]["content"]
119
  ]
120
 
121
+ # Debugging Output
122
  print("πŸ” Debug: RAGAS Docs Format:", ragas_docs)
123
 
124
  if not ragas_docs:
125
  print("⚠️ No relevant documents to evaluate.")
126
  return 0 # Return low score if no documents found
127
 
128
+ # Construct required input
129
  queries = [question]
130
  contexts = [[doc.page_content for doc in ragas_docs]]
131
 
132
  print("βœ… Debug: Queries ->", queries)
133
  print("βœ… Debug: Contexts ->", contexts)
134
 
135
+ # Run evaluation
136
  scores = evaluate(
137
  queries=queries,
138
  contexts=contexts,
 
234
  query_vector = embedding_model.embed_query(question)
235
 
236
  # Query Qdrant with the vector
237
+ relevant_docs = search(query_vector=query_vector, top_k=1)
238
 
239
+ model_answer = "No answer generated yet"
 
 
 
240
 
241
+ if relevant_docs[0]['score'] > hit_score: # Threshold for good retrieval quality this will be the cosine similarity score
242
  # Found relevant document β†’ Summarize it
243
  document_name = relevant_docs[0]["metadata"].get("document_name", "No source available.")
244
  document_text = get_document_by_name(document_name)
 
245
  messages = summary_prompt.format_messages(document=document_text)
246
  response = llm.invoke(messages)
247
 
248
+ if EVALUATION_MODE:
249
+ # Evaluate retrieved documents using RAGAS
250
+ relevance_score = evaluate_retrieved_docs(question, relevant_docs)
251
+ print(f"πŸ“Š [Evaluation Mode] RAGAS Score: {relevance_score}")
252
+ ragas_scores = evaluate_ragas_metrics(question, model_answer, relevant_docs)
253
+ print(f"πŸ“Š [evaluate_ragas_metrics] RAGAS Scores: {ragas_scores}")
254
+
255
  return {**state, "messages": state["messages"] + [HumanMessage(content=response.content)], "_next": "post_processing"}
256
 
257
  else: