harshnarayan12 commited on
Commit
72f9b35
·
verified ·
1 Parent(s): 073bb25

Upload 71 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. agents/__init__.py +0 -0
  3. agents/__pycache__/__init__.cpython-311.pyc +0 -0
  4. agents/__pycache__/__init__.cpython-312.pyc +0 -0
  5. agents/__pycache__/agent_decision.cpython-311.pyc +0 -0
  6. agents/__pycache__/shared_rag.cpython-311.pyc +0 -0
  7. agents/__pycache__/shared_rag.cpython-312.pyc +0 -0
  8. agents/agent_decision.py +336 -0
  9. agents/rag_agent/UNUSED_evaluation.py +177 -0
  10. agents/rag_agent/__init__.py +340 -0
  11. agents/rag_agent/__pycache__/__init__.cpython-311.pyc +0 -0
  12. agents/rag_agent/__pycache__/data_ingestion.cpython-311.pyc +0 -0
  13. agents/rag_agent/__pycache__/document_processor.cpython-311.pyc +0 -0
  14. agents/rag_agent/__pycache__/query_processor.cpython-311.pyc +0 -0
  15. agents/rag_agent/__pycache__/reranker.cpython-311.pyc +0 -0
  16. agents/rag_agent/__pycache__/response_generator.cpython-311.pyc +0 -0
  17. agents/rag_agent/__pycache__/vector_store.cpython-311.pyc +0 -0
  18. agents/rag_agent/data_ingestion.py +300 -0
  19. agents/rag_agent/document_processor.py +507 -0
  20. agents/rag_agent/query_processor.py +142 -0
  21. agents/rag_agent/reranker.py +69 -0
  22. agents/rag_agent/response_generator.py +326 -0
  23. agents/rag_agent/vector_store.py +298 -0
  24. agents/shared_rag.py +151 -0
  25. agents/web_search_processor_agent/__init__.py +25 -0
  26. agents/web_search_processor_agent/__pycache__/__init__.cpython-311.pyc +0 -0
  27. agents/web_search_processor_agent/__pycache__/geminai_search.cpython-311.pyc +0 -0
  28. agents/web_search_processor_agent/__pycache__/pubmed_search.cpython-311.pyc +0 -0
  29. agents/web_search_processor_agent/__pycache__/tavily_search.cpython-311.pyc +0 -0
  30. agents/web_search_processor_agent/__pycache__/web_search_agent.cpython-311.pyc +0 -0
  31. agents/web_search_processor_agent/__pycache__/web_search_processor.cpython-311.pyc +0 -0
  32. agents/web_search_processor_agent/geminai_search.py +162 -0
  33. agents/web_search_processor_agent/web_search_agent.py +125 -0
  34. agents/web_search_processor_agent/web_search_processor.py +116 -0
  35. chat_sessions/chat_Harsh Narayan_20250802_165208.json +24 -0
  36. chat_sessions/chat_Harsh Narayan_20250804_163618.json +27 -0
  37. chat_sessions/chat_string_string.json +8 -0
  38. config/__init__.py +0 -0
  39. config/__pycache__/__init__.cpython-311.pyc +0 -0
  40. config/__pycache__/config.cpython-311.pyc +0 -0
  41. config/agents.yaml +42 -0
  42. config/config.py +164 -0
  43. config/rag.yaml +26 -0
  44. config/tasks.yaml +79 -0
  45. crew_ai/README.md +164 -0
  46. crew_ai/__init__.py +0 -0
  47. crew_ai/__pycache__/__init__.cpython-311.pyc +0 -0
  48. crew_ai/__pycache__/chatbot.cpython-311.pyc +0 -0
  49. crew_ai/__pycache__/config.cpython-311.pyc +0 -0
  50. crew_ai/__pycache__/data_retriever_util.cpython-311.pyc +0 -0
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  static/treeman.png filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  static/treeman.png filter=lfs diff=lfs merge=lfs -text
37
+ knowledge/deathless.pdf filter=lfs diff=lfs merge=lfs -text
38
+ knowledge/know-see.pdf filter=lfs diff=lfs merge=lfs -text
agents/__init__.py ADDED
File without changes
agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (154 Bytes). View file
 
agents/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (142 Bytes). View file
 
agents/__pycache__/agent_decision.cpython-311.pyc ADDED
Binary file (15.7 kB). View file
 
agents/__pycache__/shared_rag.cpython-311.pyc ADDED
Binary file (9.25 kB). View file
 
agents/__pycache__/shared_rag.cpython-312.pyc ADDED
Binary file (8.42 kB). View file
 
agents/agent_decision.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent Decision System for Multi-Agent Mental Health Chatbot
3
+ Orchestrates RAG, CrewAI, and Web Search agents using LangGraph.
4
+ """
5
+ from dotenv import load_dotenv
6
+ import json
7
+ from typing import TypedDict, List, Any, Optional, Union, Dict
8
+ from langchain_core.messages import HumanMessage, AIMessage
9
+ from langgraph.graph import StateGraph, END
10
+ from langgraph.checkpoint.memory import MemorySaver
11
+ from difflib import SequenceMatcher
12
+
13
+ from agents.web_search_processor_agent import WebSearchProcessorAgent
14
+ from config.config import Config
15
+ from .shared_rag import shared_rag_instance
16
+
17
+ # Import CrewAI components
18
+ try:
19
+ from crew_ai.chatbot import (
20
+ run_crisis_check,
21
+ run_condition_classification,
22
+ run_user_profile_retrieval,
23
+ run_recommendations
24
+ )
25
+ CREWAI_AVAILABLE = True
26
+ print("✅ CrewAI components imported successfully")
27
+ except ImportError as e:
28
+ print(f"⚠️ CrewAI components not available: {e}")
29
+ CREWAI_AVAILABLE = False
30
+
31
+ load_dotenv()
32
+
33
+ # Configuration
34
+ config = Config()
35
+ memory = MemorySaver()
36
+
37
+ # Simple query cache
38
+ QUERY_CACHE = {}
39
+ CACHE_SIMILARITY_THRESHOLD = 0.85
40
+
41
+ class AgentState(TypedDict):
42
+ """State maintained across the workflow."""
43
+ messages: List[Any]
44
+ agent_name: Optional[str]
45
+ current_input: Optional[Union[str, Dict]]
46
+ output: Optional[str]
47
+ retrieval_confidence: float
48
+ user_context: Optional[Dict]
49
+ # CrewAI specific state
50
+ crisis_detected: Optional[bool]
51
+ condition_classified: Optional[str]
52
+ user_profile: Optional[Dict]
53
+ crewai_used: Optional[bool]
54
+
55
+ def get_cached_response(query: str, user_context: dict) -> Optional[dict]:
56
+ """Check for cached similar response."""
57
+ query_lower = query.lower()
58
+
59
+ for cached_query, cached_data in QUERY_CACHE.items():
60
+ similarity = SequenceMatcher(None, query_lower, cached_query.lower()).ratio()
61
+ if similarity >= CACHE_SIMILARITY_THRESHOLD:
62
+ if (cached_data['emotion'] == user_context.get('emotion') and
63
+ cached_data['status'] == user_context.get('mental_health_status')):
64
+ print(f"[CACHE] Found similar response (similarity: {similarity:.2f})")
65
+ return cached_data['response']
66
+ return None
67
+
68
+ def cache_response(query: str, user_context: dict, response: dict):
69
+ """Cache response for future use."""
70
+ if len(QUERY_CACHE) > 100:
71
+ oldest_key = next(iter(QUERY_CACHE))
72
+ del QUERY_CACHE[oldest_key]
73
+
74
+ QUERY_CACHE[query] = {
75
+ 'emotion': user_context.get('emotion'),
76
+ 'status': user_context.get('mental_health_status'),
77
+ 'response': response
78
+ }
79
+
80
+ def create_agent_graph():
81
+ """Create and configure the LangGraph for agent orchestration."""
82
+
83
+ def rag_agent_processor(state):
84
+ """Process query using RAG pipeline"""
85
+ try:
86
+ user_query = state["current_input"]
87
+ user_context = state.get("user_context", {})
88
+
89
+ print(f"[RAG_AGENT] Processing: {user_query[:50]}...")
90
+ print(f"[RAG_AGENT] Context - Emotion: {user_context.get('emotion', 'neutral')}, Status: {user_context.get('mental_health_status', 'Unknown')}")
91
+
92
+ # Check cache first
93
+ cached_response = get_cached_response(user_query, user_context)
94
+ if cached_response:
95
+ print("[RAG_AGENT] Using cached response")
96
+ return {
97
+ **state,
98
+ "output": cached_response.get('response', ''),
99
+ "agent_name": "RAG_AGENT_CACHED",
100
+ "retrieval_confidence": cached_response.get('confidence', 0.8),
101
+ "crewai_used": False
102
+ }
103
+
104
+ # Get RAG instance and process query
105
+ rag = shared_rag_instance.get_rag()
106
+ result = rag.process_query(
107
+ query=user_query,
108
+ user_emotion=user_context.get('emotion', 'neutral'),
109
+ mental_health_status=user_context.get('mental_health_status', 'Unknown'),
110
+ user_context=user_context
111
+ )
112
+
113
+ confidence = result.get("confidence", 0.0)
114
+ response_text = result.get("response", "I'm here to help you with your mental health concerns.")
115
+
116
+ print(f"[RAG_AGENT] Confidence: {confidence}")
117
+ print(f"[RAG_AGENT] Response: {response_text[:100]}...")
118
+
119
+ # Cache the response
120
+ cache_response(user_query, user_context, result)
121
+
122
+ return {
123
+ **state,
124
+ "output": response_text,
125
+ "agent_name": "RAG_AGENT",
126
+ "retrieval_confidence": confidence,
127
+ "crewai_used": False
128
+ }
129
+
130
+ except Exception as e:
131
+ print(f"[RAG_AGENT] Error: {e}")
132
+ return {
133
+ **state,
134
+ "output": "I understand you're reaching out for support. While I'm having some technical difficulties, I want you to know that your feelings are valid and there are people who can help.",
135
+ "agent_name": "RAG_AGENT_FALLBACK",
136
+ "retrieval_confidence": 0.0,
137
+ "crewai_used": False
138
+ }
139
+
140
+ def crewai_pipeline_processor(state: AgentState) -> AgentState:
141
+ """Process query using CrewAI pipeline when RAG confidence is low"""
142
+ print("[CREWAI_PIPELINE] Processing with CrewAI agents...")
143
+
144
+ try:
145
+ if not CREWAI_AVAILABLE:
146
+ print("[CREWAI_PIPELINE] Not available, falling back to web search")
147
+ return fallback_to_web_search(state)
148
+
149
+ user_query = str(state["current_input"])
150
+ user_context = state.get("user_context", {})
151
+
152
+ print(f"[CREWAI_PIPELINE] Query: {user_query[:50]}...")
153
+
154
+ # Use RAG's CrewAI integration if available
155
+ rag = shared_rag_instance.get_rag()
156
+ if hasattr(rag, 'process_query_with_crewai') and rag.crewai_enabled:
157
+ print("[CREWAI_PIPELINE] Using RAG's CrewAI integration")
158
+
159
+ result = rag.process_query_with_crewai(user_query, user_context)
160
+
161
+ return {
162
+ **state,
163
+ "output": result.get("response", "I'm here to support you."),
164
+ "agent_name": result.get("agent", "CREWAI_ENHANCED_SYSTEM"),
165
+ "crisis_detected": result.get("is_crisis", False),
166
+ "condition_classified": result.get("condition", "general"),
167
+ "crewai_used": True,
168
+ "retrieval_confidence": result.get("confidence", 0.85)
169
+ }
170
+ else:
171
+ print("[CREWAI_PIPELINE] Using direct CrewAI functions")
172
+
173
+ # Direct CrewAI processing
174
+ user_id = user_context.get('user_id', 'anon_user')
175
+
176
+ # Crisis check
177
+ crisis_result = run_crisis_check(user_query)
178
+ is_crisis = crisis_result.get("is_crisis", False)
179
+
180
+ if is_crisis:
181
+ crisis_rec = run_recommendations(
182
+ user_query,
183
+ user_profile=json.dumps(user_context),
184
+ condition="Crisis",
185
+ answers="{}",
186
+ interpretation="N/A",
187
+ is_crisis="true"
188
+ )
189
+
190
+ return {
191
+ **state,
192
+ "output": crisis_rec.get("recommendation",
193
+ "🆘 Please reach out for immediate help. In Bhutan: Emergency Services (112), National Mental Health Program (1717)"),
194
+ "agent_name": "CREWAI_CRISIS_AGENT",
195
+ "crisis_detected": True,
196
+ "crewai_used": True,
197
+ "retrieval_confidence": 0.95
198
+ }
199
+
200
+ # Normal processing
201
+ try:
202
+ user_profile = run_user_profile_retrieval(user_query, user_id)
203
+ except:
204
+ user_profile = {"id": user_id, "name": user_context.get('name', 'User')}
205
+
206
+ try:
207
+ condition_result = run_condition_classification(user_query, json.dumps(user_profile))
208
+ condition = condition_result.get("condition", "general").lower()
209
+ except:
210
+ condition = "general"
211
+
212
+ final_rec = run_recommendations(
213
+ user_query,
214
+ json.dumps(user_profile),
215
+ condition,
216
+ json.dumps(user_context.get('assessment_answers', {})),
217
+ user_context.get('mental_health_status', 'Unknown'),
218
+ is_crisis="false"
219
+ )
220
+
221
+ return {
222
+ **state,
223
+ "output": final_rec.get("recommendation",
224
+ f"Thank you for sharing your concerns. I'm here to support you with {condition} related issues."),
225
+ "agent_name": "CREWAI_ENHANCED_SYSTEM",
226
+ "condition_classified": condition,
227
+ "user_profile": user_profile,
228
+ "crewai_used": True,
229
+ "retrieval_confidence": 0.85
230
+ }
231
+
232
+ except Exception as e:
233
+ print(f"[CREWAI_PIPELINE] Error: {e}")
234
+ return fallback_to_web_search(state)
235
+
236
+ def fallback_to_web_search(state: AgentState) -> AgentState:
237
+ """Fallback to web search processor"""
238
+ print("[WEB_SEARCH] Processing with web search agent...")
239
+
240
+ try:
241
+ query = str(state["current_input"])
242
+ user_context = state.get("user_context", {})
243
+
244
+ # Use WebSearchProcessorAgent
245
+ web_agent = WebSearchProcessorAgent()
246
+ response = web_agent.process_web_search_results(
247
+ query=query,
248
+ user_context=user_context
249
+ )
250
+
251
+ return {
252
+ **state,
253
+ "output": response,
254
+ "agent_name": "WEB_SEARCH_PROCESSOR_AGENT",
255
+ "crewai_used": False
256
+ }
257
+
258
+ except Exception as e:
259
+ print(f"[WEB_SEARCH] Error: {e}")
260
+ return {
261
+ **state,
262
+ "output": "I'm here to support you, though I'm having some technical difficulties. Please know that help is available. For immediate support in Bhutan, contact the National Mental Health Program at 1717.",
263
+ "agent_name": "WEB_SEARCH_FALLBACK",
264
+ "crewai_used": False
265
+ }
266
+
267
+ def confidence_based_routing(state: AgentState) -> str:
268
+ """Route based on RAG confidence score."""
269
+ min_confidence = getattr(config.rag, 'min_retrieval_confidence', 0.7)
270
+ confidence = state.get("retrieval_confidence", 0.0)
271
+
272
+ print(f"[ROUTING] Confidence: {confidence:.2f}, Threshold: {min_confidence}")
273
+
274
+ if confidence < min_confidence:
275
+ if CREWAI_AVAILABLE:
276
+ print(f"[ROUTING] Low confidence, routing to CrewAI...")
277
+ return "CREWAI_PIPELINE"
278
+ else:
279
+ print(f"[ROUTING] Low confidence, routing to Web Search...")
280
+ return "WEB_SEARCH_PROCESSOR_AGENT"
281
+
282
+ print(f"[ROUTING] High confidence, finalizing...")
283
+ return "finalize_response"
284
+
285
+ def finalize_response(state: AgentState) -> AgentState:
286
+ """Finalize the response."""
287
+ output = state.get("output", "")
288
+
289
+ if output:
290
+ messages = state.get("messages", [])
291
+ messages.append(AIMessage(content=str(output)))
292
+
293
+ agent_name = state.get("agent_name", "Unknown")
294
+ crewai_used = state.get("crewai_used", False)
295
+
296
+ print(f"[FINALIZE] Response from {agent_name}, CrewAI: {crewai_used}")
297
+
298
+ return {
299
+ **state,
300
+ "messages": messages
301
+ }
302
+
303
+ return state
304
+
305
+ # Create workflow graph
306
+ workflow = StateGraph(AgentState)
307
+
308
+ # Add nodes
309
+ workflow.add_node("RAG_AGENT", rag_agent_processor)
310
+ workflow.add_node("CREWAI_PIPELINE", crewai_pipeline_processor)
311
+ workflow.add_node("WEB_SEARCH_PROCESSOR_AGENT", fallback_to_web_search)
312
+ workflow.add_node("finalize_response", finalize_response)
313
+
314
+ # Define edges
315
+ workflow.set_entry_point("RAG_AGENT")
316
+ workflow.add_conditional_edges("RAG_AGENT", confidence_based_routing)
317
+ workflow.add_edge("CREWAI_PIPELINE", "finalize_response")
318
+ workflow.add_edge("WEB_SEARCH_PROCESSOR_AGENT", "finalize_response")
319
+ workflow.add_edge("finalize_response", END)
320
+
321
+ return workflow.compile(checkpointer=memory)
322
+
323
+ def init_agent_state() -> AgentState:
324
+ """Initialize agent state with default values."""
325
+ return {
326
+ "messages": [],
327
+ "agent_name": None,
328
+ "current_input": None,
329
+ "output": None,
330
+ "retrieval_confidence": 0.0,
331
+ "user_context": None,
332
+ "crisis_detected": None,
333
+ "condition_classified": None,
334
+ "user_profile": None,
335
+ "crewai_used": None
336
+ }
agents/rag_agent/UNUSED_evaluation.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List, Dict, Any, Optional
3
+ import re
4
+ import json
5
+ from collections import Counter
6
+
7
+ class RAGEvaluator:
8
+ """
9
+ Evaluates the performance of the RAG system and tracks metrics.
10
+ """
11
+ def __init__(self, config):
12
+ """
13
+ Initialize the evaluator.
14
+ Args:
15
+ config: Configuration object
16
+ """
17
+ self.logger = logging.getLogger(__name__)
18
+ self.metrics = {
19
+ "queries_processed": 0,
20
+ "successful_retrievals": 0,
21
+ "failed_retrievals": 0,
22
+ "avg_retrieval_time": 0,
23
+ "avg_response_time": 0,
24
+ "avg_confidence_score": 0,
25
+ "feedback_scores": []
26
+ }
27
+ self.save_path = getattr(config.rag, "metrics_save_path", "rag_metrics.json")
28
+
29
+ def log_retrieval(self, query: str, retrieved_docs: List[Dict[str, Any]],
30
+ retrieval_time: float, success: bool = True):
31
+ """
32
+ Log metrics for a retrieval operation.
33
+ """
34
+ self.metrics["queries_processed"] += 1
35
+
36
+ if success and retrieved_docs:
37
+ self.metrics["successful_retrievals"] += 1
38
+ prev_avg = self.metrics["avg_retrieval_time"]
39
+ prev_count = self.metrics["queries_processed"] - 1
40
+ self.metrics["avg_retrieval_time"] = (prev_avg * prev_count + retrieval_time) / self.metrics["queries_processed"]
41
+ scores = [doc.get("score", 0) for doc in retrieved_docs]
42
+ avg_score = sum(scores) / len(scores) if scores else 0
43
+ self.logger.info(f"Query: '{query}' | Docs: {len(retrieved_docs)} | Avg Score: {avg_score:.4f}")
44
+ else:
45
+ self.metrics["failed_retrievals"] += 1
46
+ self.logger.warning(f"Failed retrieval for query: '{query}'")
47
+
48
+ def log_response(self, query: str, response: Dict[str, Any], response_time: float):
49
+ """
50
+ Log metrics for a response generation operation.
51
+ """
52
+ prev_avg = self.metrics["avg_response_time"]
53
+ prev_count = self.metrics["queries_processed"] - 1
54
+ self.metrics["avg_response_time"] = (prev_avg * prev_count + response_time) / self.metrics["queries_processed"]
55
+
56
+ confidence = response.get("confidence", 0)
57
+ prev_avg = self.metrics["avg_confidence_score"]
58
+ self.metrics["avg_confidence_score"] = (prev_avg * prev_count + confidence) / self.metrics["queries_processed"]
59
+
60
+ self.logger.info(f"Generated response for query: '{query}' | Confidence: {confidence:.4f}")
61
+
62
+ def log_user_feedback(self, query: str, response: Dict[str, Any], feedback_score: int):
63
+ """
64
+ Log user feedback on responses.
65
+ """
66
+ self.metrics["feedback_scores"].append({
67
+ "query": query,
68
+ "response": response.get("response", ""),
69
+ "score": feedback_score
70
+ })
71
+ self.logger.info(f"Received feedback for query: '{query}' | Score: {feedback_score}")
72
+
73
+ def evaluate_response_quality(self, query: str, retrieved_docs: List[Dict[str, Any]],
74
+ response: Dict[str, Any]) -> Dict[str, Any]:
75
+ """
76
+ Evaluate the quality of the response based on retrieved documents.
77
+ """
78
+ retrieval_precision = self._calculate_precision(query, retrieved_docs)
79
+ answer_relevance = self._calculate_relevance(query, response, retrieved_docs)
80
+ metrics = {
81
+ "retrieval_precision": retrieval_precision,
82
+ "answer_relevance": answer_relevance,
83
+ "hallucination_risk": self._estimate_hallucination_risk(response, retrieved_docs),
84
+ "answer_completeness": self._calculate_completeness(response, retrieved_docs)
85
+ }
86
+ return metrics
87
+
88
+ def _calculate_precision(self, query: str, docs: List[Dict[str, Any]]) -> float:
89
+ """
90
+ Calculate precision of retrieved documents (simplified).
91
+ """
92
+ if not docs:
93
+ return 0.0
94
+ scores = [doc.get("score", 0) for doc in docs]
95
+ return sum(scores) / len(scores) if scores else 0.0
96
+
97
+ def _calculate_relevance(self, query: str, response: Dict[str, Any], docs: List[Dict[str, Any]]) -> float:
98
+ """
99
+ Calculate relevance of the response to the query.
100
+ """
101
+ if not docs or not response:
102
+ return 0.0
103
+ response_text = response.get("response", "").lower()
104
+ query_words = set(query.lower().split())
105
+ stopwords = {"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "with", "by"}
106
+ query_words = query_words - stopwords
107
+ word_count = sum(1 for word in query_words if word in response_text)
108
+ return word_count / len(query_words) if query_words else 0.0
109
+
110
+ def _estimate_hallucination_risk(self, response: Dict[str, Any], docs: List[Dict[str, Any]]) -> float:
111
+ """
112
+ Estimate risk of hallucination in the response.
113
+ """
114
+ if not docs or not response:
115
+ return 1.0
116
+ all_doc_content = " ".join([doc["content"].lower() for doc in docs])
117
+ response_text = response.get("response", "").lower()
118
+ factual_statements = re.findall(r'[^.!?]*[.!?]', response_text)
119
+ unsupported = 0
120
+ total = len(factual_statements)
121
+ for statement in factual_statements:
122
+ has_number = bool(re.search(r'\d+', statement))
123
+ has_medical_term = bool(re.search(r'(?i)(disease|syndrome|treatment|medication|therapy|drug|dosage|diagnosis)', statement))
124
+ if (has_number or has_medical_term) and not self._is_supported(statement, all_doc_content):
125
+ unsupported += 1
126
+ return unsupported / total if total > 0 else 0.5
127
+
128
+ def _is_supported(self, statement: str, doc_content: str) -> bool:
129
+ """
130
+ Check if a statement is supported by document content.
131
+ """
132
+ keywords = statement.lower().split()
133
+ keywords = [w for w in keywords if len(w) > 4]
134
+ if not keywords:
135
+ return True
136
+ found = sum(1 for word in keywords if word in doc_content)
137
+ return (found / len(keywords)) >= 0.6
138
+
139
+ def _calculate_completeness(self, response: Dict[str, Any], docs: List[Dict[str, Any]]) -> float:
140
+ """
141
+ Calculate completeness of the response.
142
+ """
143
+ response_text = response.get("response", "")
144
+ word_count = len(response_text.split())
145
+ length_score = min(word_count / 150, 1.0)
146
+ has_introduction = bool(re.search(r'^[A-Z][^.!?]{10,}[.!?]', response_text))
147
+ has_conclusion = bool(re.search(r'(?i)(in conclusion|to summarize|overall|in summary)', response_text))
148
+ structure_score = (has_introduction + has_conclusion) / 2
149
+ return (length_score * 0.7) + (structure_score * 0.3)
150
+
151
+ def save_metrics(self):
152
+ """Save current metrics to disk."""
153
+ try:
154
+ with open(self.save_path, 'w') as f:
155
+ json.dump(self.metrics, f, indent=2)
156
+ self.logger.info(f"Metrics saved to {self.save_path}")
157
+ except Exception as e:
158
+ self.logger.error(f"Failed to save metrics: {e}")
159
+
160
+ def get_metrics_summary(self) -> Dict[str, Any]:
161
+ """
162
+ Get a summary of current metrics.
163
+ """
164
+ retrieval_success_rate = 0
165
+ if self.metrics["queries_processed"] > 0:
166
+ retrieval_success_rate = self.metrics["successful_retrievals"] / self.metrics["queries_processed"]
167
+ feedback_distribution = Counter(item["score"] for item in self.metrics["feedback_scores"])
168
+ avg_feedback = sum(item["score"] for item in self.metrics["feedback_scores"]) / len(self.metrics["feedback_scores"]) if self.metrics["feedback_scores"] else 0
169
+ return {
170
+ "queries_processed": self.metrics["queries_processed"],
171
+ "retrieval_success_rate": retrieval_success_rate,
172
+ "avg_retrieval_time": self.metrics["avg_retrieval_time"],
173
+ "avg_response_time": self.metrics["avg_response_time"],
174
+ "avg_confidence_score": self.metrics["avg_confidence_score"],
175
+ "feedback_distribution": feedback_distribution,
176
+ "avg_feedback_score": avg_feedback
177
+ }
agents/rag_agent/__init__.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Dict, Any, List, Optional
3
+ from sentence_transformers import SentenceTransformer
4
+ import logging
5
+ from config.config import Config
6
+
7
+ from .vector_store import QdrantRetriever
8
+ from .document_processor import MedicalDocumentProcessor
9
+ from .query_processor import QueryProcessor
10
+ from .reranker import Reranker
11
+ from .response_generator import ResponseGenerator
12
+ from .data_ingestion import MedicalDataIngestion
13
+ import json
14
+
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+
18
+
19
+ class MedicalRAG:
20
+ """
21
+ Medical Retrieval-Augmented Generation system that integrates all components.
22
+ """
23
+ def __init__(self, config: Config, llm, embedding_model):
24
+ self.logger = logging.getLogger(__name__)
25
+ self.logger.setLevel(logging.INFO)
26
+
27
+ self.config = config
28
+ self.llm = llm
29
+ self.embedding_model = embedding_model or SentenceTransformer("all-MiniLM-L6-v2")
30
+
31
+ if not self.embedding_model:
32
+ raise ValueError("Embedding model is required for document processing")
33
+
34
+ # Get chunking parameters from config
35
+ self.chunk_size = getattr(config.rag, 'chunk_size', 256)
36
+ self.chunk_overlap = getattr(config.rag, 'chunk_overlap', 50)
37
+ self.chunking_strategy = getattr(config.rag, 'chunking_strategy', 'hybrid')
38
+
39
+ # Ensure processed docs directory exists
40
+ self.processed_docs_dir = Path(getattr(config.rag, 'processed_docs_dir', 'processed_docs'))
41
+ self.processed_docs_dir.mkdir(exist_ok=True)
42
+
43
+ # Initialize CrewAI integration
44
+ self.crewai_enabled = True
45
+ try:
46
+ self._initialize_crewai_agents()
47
+ self.logger.info("✅ CrewAI agents initialized successfully")
48
+ except Exception as e:
49
+ self.logger.warning(f"⚠️ CrewAI agents not available: {e}")
50
+ self.crewai_enabled = False
51
+
52
+ # Initialize core components
53
+ try:
54
+ self.retriever = QdrantRetriever(config)
55
+ self.document_processor = MedicalDocumentProcessor(config, self.embedding_model)
56
+ self.query_processor = QueryProcessor(config, self.embedding_model)
57
+ self.reranker = Reranker(config)
58
+ self.response_generator = ResponseGenerator(config, llm)
59
+ self.data_ingestion = MedicalDataIngestion()
60
+
61
+ self.logger.info(f"✅ MedicalRAG initialized - Embedding dim: {getattr(config.rag, 'embedding_dim', 'unknown')}")
62
+
63
+ except Exception as e:
64
+ self.logger.error(f"❌ Error initializing MedicalRAG components: {e}")
65
+ raise
66
+
67
+ def _initialize_crewai_agents(self):
68
+ """Initialize CrewAI agents from crew_ai module"""
69
+ from crew_ai.chatbot import (
70
+ run_crisis_check,
71
+ run_condition_classification,
72
+ run_user_profile_retrieval,
73
+ run_recommendations
74
+ )
75
+
76
+ # Store CrewAI functions
77
+ self.run_crisis_check = run_crisis_check
78
+ self.run_condition_classification = run_condition_classification
79
+ self.run_user_profile_retrieval = run_user_profile_retrieval
80
+ self.run_recommendations = run_recommendations
81
+
82
+ def ingest_knowledge_folder(self, folder_path: str) -> Dict[str, Any]:
83
+ """
84
+ Ingest all documents from a knowledge folder.
85
+ """
86
+ folder = Path(folder_path)
87
+ if not folder.exists():
88
+ self.logger.error(f"Knowledge folder not found: {folder_path}")
89
+ return {"error": f"Folder not found: {folder_path}"}
90
+
91
+ ingestion_results = {
92
+ "total_files": 0,
93
+ "successful": 0,
94
+ "failed": 0,
95
+ "files": [],
96
+ "total_chunks": 0
97
+ }
98
+
99
+ # Get all supported files
100
+ supported_extensions = ['.txt', '.pdf', '.md', '.json', '.csv']
101
+ files = []
102
+ for ext in supported_extensions:
103
+ files.extend(folder.glob(f'*{ext}'))
104
+
105
+ ingestion_results["total_files"] = len(files)
106
+
107
+ if not files:
108
+ self.logger.warning(f"No supported files found in {folder_path}")
109
+ return ingestion_results
110
+
111
+ self.logger.info(f"Found {len(files)} files to ingest")
112
+
113
+ for file_path in files:
114
+ try:
115
+ self.logger.info(f"Ingesting {file_path.name}...")
116
+
117
+ # Load and process document
118
+ documents = self.data_ingestion.load_document(str(file_path))
119
+ if not documents:
120
+ self.logger.warning(f"No content extracted from {file_path.name}")
121
+ ingestion_results["failed"] += 1
122
+ continue
123
+
124
+ # Create chunks
125
+ chunks = self.document_processor.process_documents(documents)
126
+ self.logger.info(f"Created {len(chunks)} chunks from {file_path.name}")
127
+
128
+ # Store in vector database
129
+ stored_count = self.retriever.add_documents(chunks)
130
+
131
+ ingestion_results["successful"] += 1
132
+ ingestion_results["total_chunks"] += len(chunks)
133
+ ingestion_results["files"].append({
134
+ "name": file_path.name,
135
+ "status": "success",
136
+ "chunks": len(chunks),
137
+ "stored": stored_count
138
+ })
139
+
140
+ except Exception as e:
141
+ self.logger.error(f"Error ingesting {file_path.name}: {str(e)}")
142
+ ingestion_results["failed"] += 1
143
+ ingestion_results["files"].append({
144
+ "name": file_path.name,
145
+ "status": "error",
146
+ "error": str(e)
147
+ })
148
+
149
+ self.logger.info(f"Ingestion complete: {ingestion_results['successful']}/{ingestion_results['total_files']} files processed")
150
+ return ingestion_results
151
+
152
+ def process_query(self, query: str, user_emotion: Optional[str] = None,
153
+ mental_health_status: Optional[str] = None,
154
+ user_context: Optional[Dict] = None, **kwargs) -> Dict[str, Any]:
155
+ """Process a query using the RAG pipeline with emotion and health status awareness."""
156
+ try:
157
+ # Extract and validate parameters
158
+ user_emotion = user_emotion or 'neutral'
159
+ mental_health_status = mental_health_status or 'Unknown'
160
+ user_context = user_context or {}
161
+
162
+ self.logger.info(f"[RAG] Processing query: {query[:50]}...")
163
+ self.logger.info(f"[RAG] User emotion: {user_emotion}, Status: {mental_health_status}")
164
+
165
+ # Process query metadata (NOT recursive call)
166
+ query_metadata = self.query_processor.process_query(query)
167
+ enhanced_query = query_metadata.get('expanded_query', query)
168
+
169
+ print(f"[RAG] Enhanced query: {enhanced_query}")
170
+
171
+ # Generate embedding and retrieve documents
172
+ query_embedding = self.embedding_model.encode(enhanced_query)
173
+ retrieved_docs = self.retriever.retrieve(
174
+ query_embedding,
175
+ top_k=getattr(self.config.rag, 'top_k', 5),
176
+ metadata_filter=query_metadata.get('filters', {})
177
+ )
178
+
179
+ print(f"[RAG] Retrieved {len(retrieved_docs)} documents")
180
+
181
+ # Debug first document
182
+ if retrieved_docs:
183
+ doc = retrieved_docs[0]
184
+ if hasattr(doc, 'page_content'):
185
+ print(f"[RAG] Top doc: {doc.page_content[:100]}...")
186
+ else:
187
+ print(f"[RAG] Top doc: {str(doc)[:100]}...")
188
+ else:
189
+ print("[RAG] ⚠️ No documents retrieved!")
190
+
191
+ # Rerank if enabled
192
+ if self.reranker and retrieved_docs:
193
+ print("[RAG] Reranking documents...")
194
+ reranked_docs = self.reranker.rerank(query, retrieved_docs)
195
+ else:
196
+ reranked_docs = retrieved_docs
197
+
198
+ # Generate response
199
+ response_data = self.response_generator.generate_response(
200
+ query=query,
201
+ retrieved_docs=reranked_docs,
202
+ user_emotion=user_emotion,
203
+ mental_health_status=mental_health_status,
204
+ user_context=user_context
205
+ )
206
+
207
+ # Calculate final confidence
208
+ confidence = response_data.get("confidence", 0.5)
209
+
210
+ # Boost confidence for personal emotional queries
211
+ if user_emotion not in ['neutral', 'neutral/unsure'] and any(
212
+ phrase in query.lower() for phrase in ["i am", "i feel", "i'm", "my", "me"]
213
+ ):
214
+ confidence_boost = 0.1
215
+ confidence = min(confidence + confidence_boost, 1.0)
216
+ self.logger.info(f"[RAG] Boosted confidence by {confidence_boost} for personal query")
217
+
218
+ response_data["confidence"] = confidence
219
+
220
+ print(f"[RAG] Final confidence: {confidence}")
221
+ print(f"[RAG] Response: {response_data.get('response', '')[:100]}...")
222
+
223
+ return response_data
224
+
225
+ except Exception as e:
226
+ self.logger.error(f"[RAG] Error processing query: {e}")
227
+ import traceback
228
+ traceback.print_exc()
229
+ return self._generate_error_response(str(e))
230
+
231
+ def process_query_with_crewai(self, query: str, user_context: dict = None) -> Dict[str, Any]:
232
+ """Enhanced query processing using CrewAI agents"""
233
+ try:
234
+ if not self.crewai_enabled:
235
+ self.logger.info("[CrewAI] Not enabled, falling back to regular RAG")
236
+ return self.process_query(
237
+ query,
238
+ user_emotion=user_context.get('emotion', 'neutral'),
239
+ mental_health_status=user_context.get('mental_health_status', 'Unknown'),
240
+ user_context=user_context
241
+ )
242
+
243
+ self.logger.info(f"[CrewAI] Processing with agents: {query[:50]}...")
244
+
245
+ # Step 1: Crisis Detection
246
+ crisis_result = self.run_crisis_check(query)
247
+ is_crisis = crisis_result.get("is_crisis", False)
248
+
249
+ if is_crisis:
250
+ self.logger.warning("[CrewAI] 🚨 Crisis detected")
251
+ crisis_rec = self.run_recommendations(
252
+ query,
253
+ user_profile=json.dumps(user_context or {}),
254
+ condition="Crisis",
255
+ answers="{}",
256
+ interpretation="N/A",
257
+ is_crisis="true"
258
+ )
259
+ return {
260
+ "response": crisis_rec.get("recommendation",
261
+ "🆘 Please contact emergency services immediately: 112 or National Mental Health Program: 1717"),
262
+ "confidence": 0.95,
263
+ "method": "crewai_crisis",
264
+ "agent": "Crisis Detection Agent",
265
+ "is_crisis": True,
266
+ "condition": "crisis",
267
+ "sources": []
268
+ }
269
+
270
+ # Step 2: Get user profile
271
+ user_id = user_context.get('user_id', 'anon_user')
272
+ try:
273
+ user_profile = self.run_user_profile_retrieval(query, user_id)
274
+ except:
275
+ user_profile = {
276
+ "id": user_id,
277
+ "name": user_context.get('name', 'User'),
278
+ "preferences": "General mental health support"
279
+ }
280
+
281
+ # Step 3: Classify condition
282
+ try:
283
+ condition_result = self.run_condition_classification(query, json.dumps(user_profile))
284
+ condition = condition_result.get("condition", "general").lower()
285
+ except:
286
+ condition = "general"
287
+
288
+ self.logger.info(f"[CrewAI] Classified condition: {condition}")
289
+
290
+ # Step 4: Get RAG results for context
291
+ rag_result = self.process_query(
292
+ query,
293
+ user_emotion=user_context.get('emotion', 'neutral'),
294
+ mental_health_status=user_context.get('mental_health_status', 'Unknown'),
295
+ user_context=user_context
296
+ )
297
+
298
+ # Step 5: Generate final recommendation
299
+ final_rec = self.run_recommendations(
300
+ query,
301
+ json.dumps(user_profile),
302
+ condition,
303
+ json.dumps(user_context.get('assessment_answers', {})),
304
+ user_context.get('mental_health_status', 'Unknown'),
305
+ is_crisis="false"
306
+ )
307
+
308
+ # Combine RAG and CrewAI insights
309
+ combined_response = final_rec.get("recommendation", rag_result.get("response", ""))
310
+
311
+ return {
312
+ "response": combined_response,
313
+ "confidence": max(rag_result.get("confidence", 0.5), 0.85),
314
+ "method": "crewai_enhanced",
315
+ "agent": "CrewAI Enhanced System",
316
+ "sources": rag_result.get("sources", []),
317
+ "condition": condition,
318
+ "is_crisis": False,
319
+ "rag_confidence": rag_result.get("confidence", 0.5)
320
+ }
321
+
322
+ except Exception as e:
323
+ self.logger.error(f"[CrewAI] Processing failed: {e}")
324
+ # Fallback to regular RAG
325
+ return self.process_query(
326
+ query,
327
+ user_emotion=user_context.get('emotion', 'neutral'),
328
+ mental_health_status=user_context.get('mental_health_status', 'Unknown'),
329
+ user_context=user_context
330
+ )
331
+
332
+ def _generate_error_response(self, error_message: str) -> Dict[str, Any]:
333
+ """Generate a fallback error response when RAG processing fails."""
334
+ return {
335
+ "response": "I apologize, but I'm experiencing some technical difficulties. For immediate mental health support in Bhutan, please contact:\n\n• National Mental Health Program: 1717 (24/7)\n• Emergency Services: 112\n\nYour mental health matters, and help is available.",
336
+ "confidence": 0.0,
337
+ "sources": [],
338
+ "method": "error_fallback",
339
+ "error": error_message
340
+ }
agents/rag_agent/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (17.2 kB). View file
 
agents/rag_agent/__pycache__/data_ingestion.cpython-311.pyc ADDED
Binary file (14.2 kB). View file
 
agents/rag_agent/__pycache__/document_processor.cpython-311.pyc ADDED
Binary file (27.3 kB). View file
 
agents/rag_agent/__pycache__/query_processor.cpython-311.pyc ADDED
Binary file (8.58 kB). View file
 
agents/rag_agent/__pycache__/reranker.cpython-311.pyc ADDED
Binary file (4.32 kB). View file
 
agents/rag_agent/__pycache__/response_generator.cpython-311.pyc ADDED
Binary file (20.6 kB). View file
 
agents/rag_agent/__pycache__/vector_store.cpython-311.pyc ADDED
Binary file (15 kB). View file
 
agents/rag_agent/data_ingestion.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from pathlib import Path
5
+ import pandas as pd
6
+ from typing import List, Dict, Any, Optional, Union
7
+ from PyPDF2 import PdfReader
8
+
9
+ from langchain_community.document_loaders import (
10
+ TextLoader,
11
+ PyPDFLoader,
12
+ CSVLoader,
13
+ JSONLoader,
14
+ UnstructuredMarkdownLoader
15
+ )
16
+ from langchain_core.documents import Document
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class MedicalDataIngestion:
24
+ """
25
+ Handles loading of various medical document formats.
26
+ """
27
+ def __init__(self, config_path: Optional[str] = None):
28
+ """
29
+ Initialize the data ingestion pipeline.
30
+
31
+ Args:
32
+ config_path: Optional path to configuration file
33
+ """
34
+ self.logger = logging.getLogger(__name__)
35
+
36
+ # Initialize loaders mapping
37
+ self.loaders = {
38
+ '.txt': self._load_text,
39
+ '.pdf': self._load_pdf,
40
+ '.csv': self._load_csv,
41
+ '.json': self._load_json,
42
+ '.md': self._load_markdown
43
+ }
44
+
45
+ # Initialize stats
46
+ self.stats = {
47
+ "files_processed": 0,
48
+ "documents_ingested": 0,
49
+ "errors": 0
50
+ }
51
+
52
+ logger.info("MedicalDataIngestion initialized")
53
+
54
+ def load_document(self, file_path: str) -> List[Document]:
55
+ """
56
+ Load a document based on its file type.
57
+
58
+ Args:
59
+ file_path: Path to the document file
60
+
61
+ Returns:
62
+ List of Document objects
63
+ """
64
+ path = Path(file_path)
65
+ if not path.exists():
66
+ self.logger.error(f"File not found: {file_path}")
67
+ return []
68
+
69
+ suffix = path.suffix.lower()
70
+ if suffix not in self.loaders:
71
+ self.logger.warning(f"Unsupported file type: {suffix}")
72
+ return []
73
+
74
+ try:
75
+ return self.loaders[suffix](file_path)
76
+ except Exception as e:
77
+ self.logger.error(f"Error loading {file_path}: {str(e)}")
78
+ return []
79
+
80
+ def _load_text(self, file_path: str) -> List[Document]:
81
+ """Load text files."""
82
+ try:
83
+ loader = TextLoader(file_path, encoding='utf-8')
84
+ return loader.load()
85
+ except Exception as e:
86
+ # Fallback to manual loading
87
+ try:
88
+ with open(file_path, 'r', encoding='utf-8') as f:
89
+ content = f.read()
90
+ doc = Document(
91
+ page_content=content,
92
+ metadata={"source": file_path, "type": "text"}
93
+ )
94
+ return [doc]
95
+ except Exception as e2:
96
+ self.logger.error(f"Error loading text file: {e2}")
97
+ return []
98
+
99
+ def _load_pdf(self, file_path: str) -> List[Document]:
100
+ """Load PDF files."""
101
+ try:
102
+ loader = PyPDFLoader(file_path)
103
+ return loader.load()
104
+ except Exception as e:
105
+ # Fallback to PyPDF2
106
+ try:
107
+ reader = PdfReader(file_path)
108
+ documents = []
109
+
110
+ for page_num, page in enumerate(reader.pages):
111
+ page_text = page.extract_text()
112
+ if page_text:
113
+ doc = Document(
114
+ page_content=page_text,
115
+ metadata={
116
+ "source": file_path,
117
+ "type": "pdf",
118
+ "page": page_num + 1
119
+ }
120
+ )
121
+ documents.append(doc)
122
+
123
+ return documents
124
+ except Exception as e2:
125
+ self.logger.error(f"Error loading PDF file: {e2}")
126
+ return []
127
+
128
+ def _load_csv(self, file_path: str) -> List[Document]:
129
+ """Load CSV files."""
130
+ try:
131
+ loader = CSVLoader(file_path)
132
+ return loader.load()
133
+ except Exception as e:
134
+ # Fallback to pandas
135
+ try:
136
+ df = pd.read_csv(file_path)
137
+ documents = []
138
+
139
+ # Find the column with the most text content
140
+ text_column = self._identify_content_column(df)
141
+
142
+ for _, row in df.iterrows():
143
+ content = str(row[text_column])
144
+ metadata = {
145
+ "source": file_path,
146
+ "type": "csv"
147
+ }
148
+
149
+ # Add other columns as metadata
150
+ for col in df.columns:
151
+ if col != text_column and not pd.isna(row[col]):
152
+ metadata[col] = str(row[col])
153
+
154
+ doc = Document(
155
+ page_content=content,
156
+ metadata=metadata
157
+ )
158
+ documents.append(doc)
159
+
160
+ return documents
161
+ except Exception as e2:
162
+ self.logger.error(f"Error loading CSV file: {e2}")
163
+ return []
164
+
165
+ def _load_json(self, file_path: str) -> List[Document]:
166
+ """Load JSON files."""
167
+ try:
168
+ # For JSON files, we'll create documents from each entry
169
+ with open(file_path, 'r', encoding='utf-8') as f:
170
+ data = json.load(f)
171
+
172
+ documents = []
173
+ if isinstance(data, list):
174
+ for idx, item in enumerate(data):
175
+ content = json.dumps(item, indent=2)
176
+ doc = Document(
177
+ page_content=content,
178
+ metadata={
179
+ "source": file_path,
180
+ "type": "json",
181
+ "index": idx
182
+ }
183
+ )
184
+ documents.append(doc)
185
+ else:
186
+ content = json.dumps(data, indent=2)
187
+ doc = Document(
188
+ page_content=content,
189
+ metadata={"source": file_path, "type": "json"}
190
+ )
191
+ documents.append(doc)
192
+
193
+ return documents
194
+ except Exception as e:
195
+ self.logger.error(f"Error loading JSON file: {e}")
196
+ return []
197
+
198
+ def _load_markdown(self, file_path: str) -> List[Document]:
199
+ """Load Markdown files."""
200
+ try:
201
+ loader = UnstructuredMarkdownLoader(file_path)
202
+ return loader.load()
203
+ except Exception as e:
204
+ # Fallback to simple text loading
205
+ try:
206
+ with open(file_path, 'r', encoding='utf-8') as f:
207
+ content = f.read()
208
+ doc = Document(
209
+ page_content=content,
210
+ metadata={"source": file_path, "type": "markdown"}
211
+ )
212
+ return [doc]
213
+ except Exception as e2:
214
+ self.logger.error(f"Error loading Markdown file: {e2}")
215
+ return []
216
+
217
+ def load_directory(self, directory_path: str) -> List[Document]:
218
+ """
219
+ Load all supported documents from a directory.
220
+
221
+ Args:
222
+ directory_path: Path to the directory
223
+
224
+ Returns:
225
+ List of all loaded documents
226
+ """
227
+ dir_path = Path(directory_path)
228
+ if not dir_path.exists():
229
+ self.logger.error(f"Directory not found: {directory_path}")
230
+ return []
231
+
232
+ all_documents = []
233
+ for file_path in dir_path.iterdir():
234
+ if file_path.is_file() and file_path.suffix.lower() in self.loaders:
235
+ self.logger.info(f"Loading {file_path.name}")
236
+ documents = self.load_document(str(file_path))
237
+ all_documents.extend(documents)
238
+
239
+ return all_documents
240
+
241
+ def _identify_content_column(self, df: pd.DataFrame) -> str:
242
+ """
243
+ Identify which column in a DataFrame contains the main content.
244
+
245
+ Args:
246
+ df: Pandas DataFrame
247
+
248
+ Returns:
249
+ Name of the content column
250
+ """
251
+ # Look for columns with these names
252
+ content_column_names = ["content", "text", "description", "abstract", "body"]
253
+
254
+ for name in content_column_names:
255
+ if name in df.columns:
256
+ return name
257
+
258
+ # If no standard content column found, look for the column with longest strings
259
+ avg_lengths = {}
260
+ for col in df.columns:
261
+ if df[col].dtype == 'object': # Only check string columns
262
+ # Calculate average string length
263
+ avg_length = df[col].astype(str).apply(len).mean()
264
+ avg_lengths[col] = avg_length
265
+
266
+ if avg_lengths:
267
+ # Return column with longest average string length
268
+ return max(avg_lengths.items(), key=lambda x: x[1])[0]
269
+
270
+ # Fallback to first column
271
+ return df.columns[0]
272
+
273
+ def _identify_json_content_field(self, item: Dict) -> Optional[str]:
274
+ """
275
+ Identify which field in a JSON object contains the main content.
276
+
277
+ Args:
278
+ item: Dictionary representing a JSON object
279
+
280
+ Returns:
281
+ Name of the content field or None if not found
282
+ """
283
+ # Look for fields with these names
284
+ content_field_names = ["content", "text", "description", "abstract", "body"]
285
+
286
+ for name in content_field_names:
287
+ if name in item and isinstance(item[name], str):
288
+ return name
289
+
290
+ # If no standard content field found, look for the field with longest string
291
+ text_fields = {}
292
+ for key, value in item.items():
293
+ if isinstance(value, str) and len(value) > 50:
294
+ text_fields[key] = len(value)
295
+
296
+ if text_fields:
297
+ # Return field with longest text
298
+ return max(text_fields.items(), key=lambda x: x[1])[0]
299
+
300
+ return None
agents/rag_agent/document_processor.py ADDED
@@ -0,0 +1,507 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import uuid
3
+ import logging
4
+ from typing import List, Dict, Any, Optional, Tuple
5
+ from pathlib import Path
6
+ import hashlib
7
+ from datetime import datetime
8
+ import nltk
9
+ from nltk.tokenize import sent_tokenize
10
+ import numpy as np
11
+ from langchain.schema import Document # Add this import for Document
12
+
13
+ class MedicalDocumentProcessor:
14
+ """
15
+ Processes ingested medical/mental health documents: chunking, embedding, and metadata enrichment.
16
+ """
17
+ def __init__(self, config, embedding_model):
18
+ self.logger = logging.getLogger(__name__)
19
+ required_attrs = ["chunk_size", "chunk_overlap", "processed_docs_dir"]
20
+ for attr in required_attrs:
21
+ if not hasattr(config.rag, attr):
22
+ raise ValueError(f"Missing '{attr}' in config.rag. Please add it to config/rag.yaml.")
23
+
24
+ self.embedding_model = embedding_model
25
+ self.chunk_size = config.rag.chunk_size
26
+ self.chunk_overlap = config.rag.chunk_overlap
27
+ self.processed_docs_dir = Path(config.rag.processed_docs_dir)
28
+ self.processed_docs_dir.mkdir(parents=True, exist_ok=True)
29
+ self.chunking_strategy = getattr(config.rag, "chunking_strategy", "hybrid")
30
+ self.logger.info(f"Using chunking strategy: {self.chunking_strategy}")
31
+
32
+ # Add mental health-specific patterns
33
+ self.document_type_patterns = {
34
+ "mental_health_tip": re.compile(r"(?i)(stress|anxiety|depression|coping|therapy|mindfulness|prevention|tip|advice|support)"),
35
+ "clinical_note": re.compile(r"(?i)(chief complaint|history of present illness|hpi|past medical history|pmh|medications|assessment|plan|review of systems|ros|physical examination|lab results|imaging|impression|followup)"),
36
+ "patient_record": re.compile(r"(?i)(patient information|demographics|vital signs|allergies|immunizations|family history|social history|surgical history|problem list)"),
37
+ "treatment_guidelines": re.compile(r"(?i)(recommendations|guidelines|protocols|indications|contraindications|dosage|administration|monitoring|special populations)"),
38
+ "pharmacology": re.compile(r"(?i)(mechanism of action|pharmacokinetics|pharmacodynamics|dosing|adverse effects|warnings|interactions|storage|pregnancy considerations)"),
39
+ "general_medical": re.compile(r"(?i)(medical|health|wellness|nutrition|exercise|lifestyle|prevention|diagnosis|treatment|symptom|condition)")}
40
+
41
+ self.section_headers = [
42
+ r"^(stress|anxiety|depression|coping|therapy|mindfulness|tip|advice|support)",
43
+ r"^(chief complaint|history of present illness|hpi|past medical history|pmh|medications|assessment|plan|review of systems|ros|physical examination|lab results|imaging|impression|followup)",
44
+ r"^(patient information|demographics|vital signs|allergies|immunizations|family history|social history|surgical history|problem list)",
45
+ r"^(recommendations|guidelines|protocols|indications|contraindications|dosage|administration|monitoring|special populations)",
46
+ r"^(mechanism of action|pharmacokinetics|pharmacodynamics|dosing|adverse effects|warnings|interactions|storage|pregnancy considerations)",
47
+ r"^(medical|health|wellness|nutrition|exercise|lifestyle|prevention|diagnosis|treatment|symptom|condition)"]
48
+ filtered_headers = [header for header in self.section_headers if header.strip()]
49
+ self.section_pattern = re.compile(f"({'|'.join(filtered_headers)})", re.IGNORECASE)
50
+
51
+ # Mental health entity patterns
52
+ self.medical_entity_categories = {
53
+ "mental_health": r"(stress|anxiety|depression|suicide|coping|therapy|counseling|mindfulness|panic|self[- ]harm|hopelessness|support group|resilience|burnout|well-being|mental health)",
54
+ "symptom": r"(headache|fatigue|insomnia|mood swings|irritability|concentration issues|memory problems|appetite changes|sleep disturbances|social withdrawal)",
55
+ "treatment": r"(medication|therapy|cognitive behavioral therapy|CBT|dialectical behavior therapy|DBT|exposure therapy|medication management|psychiatric evaluation|support group|mindfulness training)",
56
+ "diagnosis": r"(bipolar disorder|schizophrenia|post-traumatic stress disorder|PTSD|obsessive[- ]compulsive disorder|OCD|generalized anxiety disorder|GAD|major depressive disorder|MDD|panic disorder|social anxiety disorder|SAD)",
57
+ "risk_factor": r"(genetic predisposition|family history|trauma|substance abuse|chronic illness|stressful life events|social isolation|poor coping skills|low resilience|lack of support)",
58
+ "intervention": r"(cognitive restructuring|exposure therapy|mindfulness meditation|relaxation techniques|stress management|problem-solving skills|assertiveness training|social skills training|self-care strategies|crisis intervention)"}
59
+ all_patterns = [f"(?P<{cat}>{pat})" for cat, pat in self.medical_entity_categories.items()]
60
+ self.medical_entity_pattern = re.compile("|".join(all_patterns), re.IGNORECASE)
61
+
62
+
63
+
64
+ def process_documents(self, documents: List[Document]) -> List[Document]:
65
+ """Process and chunk documents."""
66
+ all_chunks = []
67
+
68
+ for doc in documents:
69
+ # Get content and metadata
70
+ content = doc.page_content if hasattr(doc, 'page_content') else str(doc)
71
+ metadata = doc.metadata if hasattr(doc, 'metadata') else {}
72
+
73
+ # Create chunks based on strategy
74
+ if self.chunking_strategy == "hybrid":
75
+ chunks = self._hybrid_chunking(content, metadata)
76
+ elif self.chunking_strategy == "semantic":
77
+ chunks = self._semantic_chunking(content, metadata)
78
+ else:
79
+ chunks = self._fixed_chunking(content, metadata)
80
+
81
+ # Add embeddings to chunks
82
+ for chunk in chunks:
83
+ if not chunk.metadata.get('embedding'):
84
+ embedding = self.embedding_model.encode(chunk.page_content)
85
+ chunk.metadata['embedding'] = embedding.tolist()
86
+
87
+ all_chunks.extend(chunks)
88
+
89
+ self.logger.info(f"Processed {len(all_chunks)} chunks from {len(documents)} documents")
90
+ return all_chunks
91
+
92
+ def _detect_document_type(self, text: str) -> str:
93
+ """
94
+ Detect the type of medical document based on content patterns.
95
+
96
+ Args:
97
+ text: Document text
98
+
99
+ Returns:
100
+ Document type string
101
+ """
102
+ type_scores = {}
103
+
104
+ # Check each document type pattern
105
+ for doc_type, pattern in self.document_type_patterns.items():
106
+ matches = pattern.findall(text)
107
+ type_scores[doc_type] = len(matches)
108
+
109
+ # Find the document type with the highest number of matches
110
+ if max(type_scores.values(), default=0) > 0:
111
+ return max(type_scores.items(), key=lambda x: x[1])[0]
112
+
113
+ # Default to general if no clear type
114
+ return "general_medical"
115
+
116
+
117
+ def _split_into_paragraphs(self, text: str, section_name: str) -> List[Tuple[str, str, str]]:
118
+ """
119
+ Split text into paragraph-level chunks.
120
+
121
+ Args:
122
+ text: Text to split
123
+ section_name: Name of the section
124
+
125
+ Returns:
126
+ List of (chunk_text, section_name, level) tuples
127
+ """
128
+ paragraphs = re.split(r'\n\s*\n', text)
129
+ chunks = []
130
+
131
+ for i, para in enumerate(paragraphs):
132
+ if not para.strip():
133
+ continue
134
+
135
+ # Check if paragraph is too large
136
+ if len(para.split()) > self.chunk_size:
137
+ # Further split into sentences
138
+ sentences = sent_tokenize(para)
139
+ current_chunk = []
140
+ current_length = 0
141
+
142
+ for sentence in sentences:
143
+ sentence_length = len(sentence.split())
144
+
145
+ if current_length + sentence_length > self.chunk_size and current_chunk:
146
+ # Add current chunk
147
+ chunk_text = " ".join(current_chunk)
148
+ chunks.append((chunk_text, section_name, "paragraph"))
149
+ current_chunk = []
150
+ current_length = 0
151
+
152
+ current_chunk.append(sentence)
153
+ current_length += sentence_length
154
+
155
+ # Add final chunk if not empty
156
+ if current_chunk:
157
+ chunk_text = " ".join(current_chunk)
158
+ chunks.append((chunk_text, section_name, "paragraph"))
159
+ else:
160
+ chunks.append((para.strip(), section_name, "paragraph"))
161
+
162
+ return chunks
163
+
164
+ def _create_sliding_window_chunks(self, text: str) -> List[Tuple[str, str, str]]:
165
+ """
166
+ Create overlapping chunks using a sliding window approach.
167
+
168
+ Args:
169
+ text: Document text
170
+
171
+ Returns:
172
+ List of (chunk_text, section_name, level) tuples
173
+ """
174
+ sentences = sent_tokenize(text)
175
+ chunks = []
176
+
177
+ # If very few sentences, return as one chunk
178
+ if len(sentences) <= 3:
179
+ return [(text, "full_document", "document")]
180
+
181
+ # Calculate stride (number of sentences to slide window)
182
+ stride = max(1, (self.chunk_size - self.chunk_overlap) // 20) # Approximate words per sentence
183
+
184
+ # Create chunks with sliding window
185
+ for i in range(0, len(sentences), stride):
186
+ # Determine end index for current window
187
+ window_size = min(i + max(3, self.chunk_size // 20), len(sentences))
188
+
189
+ # Get text for current window
190
+ window_text = " ".join(sentences[i:window_size])
191
+
192
+ # Detect current section if possible
193
+ section_match = self.section_pattern.search(window_text)
194
+ section_name = section_match.group(0) if section_match else "sliding_window"
195
+
196
+ chunks.append((window_text, section_name, "sliding"))
197
+
198
+ return chunks
199
+
200
+ def _create_recursive_chunks(self, text: str) -> List[Tuple[str, str, str]]:
201
+ """
202
+ Create hierarchical chunks at different levels of granularity.
203
+
204
+ Args:
205
+ text: Document text
206
+
207
+ Returns:
208
+ List of (chunk_text, section_name, level) tuples
209
+ """
210
+ chunks = []
211
+
212
+ # Level 1: Document-level chunk (if not too large)
213
+ if len(text.split()) <= self.chunk_size * 2:
214
+ chunks.append((text, "full_document", "document"))
215
+
216
+ # Level 2: Section-level chunks
217
+ section_matches = list(self.section_pattern.finditer(text))
218
+
219
+ if section_matches:
220
+ for i in range(len(section_matches)):
221
+ start_pos = section_matches[i].start()
222
+ section_name = text[section_matches[i].start():section_matches[i].end()].strip()
223
+
224
+ # Determine section end
225
+ if i < len(section_matches) - 1:
226
+ end_pos = section_matches[i+1].start()
227
+ else:
228
+ end_pos = len(text)
229
+
230
+ section_text = text[start_pos:end_pos].strip()
231
+
232
+ # Add section as a chunk
233
+ if section_text and len(section_text.split()) <= self.chunk_size:
234
+ chunks.append((section_text, section_name, "section"))
235
+
236
+ # Level 3: Paragraph-level chunks
237
+ paragraphs = re.split(r'\n\s*\n', section_text)
238
+
239
+ for j, para in enumerate(paragraphs):
240
+ if para.strip() and len(para.split()) <= self.chunk_size:
241
+ chunks.append((para.strip(), section_name, "paragraph"))
242
+
243
+ # Level 4: Sentence-level chunks for important sentences
244
+ if self._contains_important_entities(para):
245
+ sentences = sent_tokenize(para)
246
+ for sentence in sentences:
247
+ if self._contains_important_entities(sentence):
248
+ chunks.append((sentence.strip(), section_name, "sentence"))
249
+ else:
250
+ # No clear sections, fall back to paragraphs and sentences
251
+ paragraphs = re.split(r'\n\s*\n', text)
252
+
253
+ for para in paragraphs:
254
+ if para.strip() and len(para.split()) <= self.chunk_size:
255
+ chunks.append((para.strip(), "paragraph", "paragraph"))
256
+
257
+ return chunks
258
+ def _embed_chunks(self, chunks: List[Document]) -> List[Document]:
259
+ """Add embeddings to chunks."""
260
+ for chunk in chunks:
261
+ if chunk.page_content:
262
+ embedding = self.embedding_model.encode(chunk.page_content)
263
+ chunk.metadata['embedding'] = embedding.tolist()
264
+ return chunks
265
+
266
+ def _hybrid_chunking(self, content: str, metadata: Dict) -> List[Document]:
267
+ """Hybrid chunking combining semantic and fixed-size approaches."""
268
+ # First apply semantic chunking
269
+ semantic_chunks = self._semantic_chunking(content, metadata)
270
+
271
+ # Then apply fixed-size chunking to large semantic chunks
272
+ final_chunks = []
273
+ for chunk in semantic_chunks:
274
+ if len(chunk.page_content) > self.chunk_size * 2:
275
+ # Break down large chunks
276
+ sub_chunks = self._fixed_chunking(chunk.page_content, chunk.metadata)
277
+ final_chunks.extend(sub_chunks)
278
+ else:
279
+ final_chunks.append(chunk)
280
+
281
+ # Add embeddings to all chunks
282
+ final_chunks = self._embed_chunks(final_chunks)
283
+
284
+ return final_chunks
285
+
286
+ def _semantic_chunking(self, content: str, metadata: Dict) -> List[Document]:
287
+ """Chunk based on semantic boundaries (paragraphs, sections)."""
288
+ # Split by double newlines for paragraphs
289
+ paragraphs = content.split('\n\n')
290
+ chunks = []
291
+ current_chunk = ""
292
+
293
+ for para in paragraphs:
294
+ if len(current_chunk) + len(para) < self.chunk_size:
295
+ current_chunk += para + "\n\n"
296
+ else:
297
+ if current_chunk:
298
+ chunks.append(Document(
299
+ page_content=current_chunk.strip(),
300
+ metadata=metadata.copy()
301
+ ))
302
+ current_chunk = para + "\n\n"
303
+
304
+ if current_chunk:
305
+ chunks.append(Document(
306
+ page_content=current_chunk.strip(),
307
+ metadata=metadata.copy()
308
+ ))
309
+
310
+ return chunks
311
+
312
+ def _fixed_chunking(self, content: str, metadata: Dict) -> List[Document]:
313
+ """Fixed-size chunking with overlap."""
314
+ chunks = []
315
+ start = 0
316
+
317
+ while start < len(content):
318
+ end = start + self.chunk_size
319
+ chunk_text = content[start:end]
320
+
321
+ # Ensure we don't cut in the middle of a word
322
+ if end < len(content) and not content[end].isspace():
323
+ # Find the last space before the end
324
+ last_space = chunk_text.rfind(' ')
325
+ if last_space > 0:
326
+ end = start + last_space
327
+ chunk_text = content[start:end]
328
+
329
+ chunks.append(Document(
330
+ page_content=chunk_text.strip(),
331
+ metadata=metadata.copy()
332
+ ))
333
+
334
+ start = end - self.chunk_overlap
335
+
336
+ return chunks
337
+
338
+ def _chunk_by_sentences(self, text: str, section_name: str, chunk_size: int) -> List[Tuple[str, str, str]]:
339
+ """
340
+ Create chunks by grouping sentences while respecting chunk size.
341
+
342
+ Args:
343
+ text: Text to chunk
344
+ section_name: Name of the section
345
+ chunk_size: Maximum chunk size in words
346
+
347
+ Returns:
348
+ List of (chunk_text, section_name, level) tuples
349
+ """
350
+ sentences = sent_tokenize(text)
351
+ chunks = []
352
+ current_chunk = []
353
+ current_length = 0
354
+
355
+ for sentence in sentences:
356
+ sentence_words = sentence.split()
357
+ sentence_length = len(sentence_words)
358
+
359
+ # If adding this sentence exceeds chunk size and we already have content
360
+ if current_length + sentence_length > chunk_size and current_chunk:
361
+ # Save current chunk
362
+ chunk_text = " ".join(current_chunk)
363
+ chunks.append((chunk_text, section_name, "sentences"))
364
+
365
+ # Start new chunk with overlap
366
+ # Find a good overlap point that doesn't split mid-thought
367
+ overlap_sentences = min(2, len(current_chunk))
368
+ current_chunk = current_chunk[-overlap_sentences:]
369
+ current_length = len(" ".join(current_chunk).split())
370
+
371
+ # Add sentence to current chunk
372
+ current_chunk.append(sentence)
373
+ current_length += sentence_length
374
+
375
+ # Add final chunk if not empty
376
+ if current_chunk:
377
+ chunk_text = " ".join(current_chunk)
378
+ chunks.append((chunk_text, section_name, "sentences"))
379
+
380
+ return chunks
381
+
382
+ def _contains_important_entities(self, text: str) -> bool:
383
+ """
384
+ Check if text contains important medical entities.
385
+
386
+ Args:
387
+ text: Text to check
388
+
389
+ Returns:
390
+ Boolean indicating presence of important entities
391
+ """
392
+ entities = self._extract_medical_entities(text)
393
+ return len(entities) > 0
394
+
395
+ def _calculate_chunk_importance(self, text: str, position: int, total_chunks: int) -> float:
396
+ """
397
+ Calculate importance score for a chunk based on various factors.
398
+
399
+ Args:
400
+ text: Chunk text
401
+ position: Position in document
402
+ total_chunks: Total number of chunks
403
+
404
+ Returns:
405
+ Importance score between 0 and 1
406
+ """
407
+ # Extract entities and count them
408
+ entities = self._extract_medical_entities(text)
409
+ entity_count = len(entities)
410
+
411
+ # Calculate entity density
412
+ word_count = len(text.split())
413
+ entity_density = entity_count / max(1, word_count / 100)
414
+
415
+ # Position importance - first and last chunks often contain key information
416
+ position_score = 0.0
417
+ if position == 0 or position == total_chunks - 1:
418
+ position_score = 0.2
419
+ elif position < total_chunks * 0.2 or position > total_chunks * 0.8:
420
+ position_score = 0.1
421
+
422
+ # Check for important keywords
423
+ keyword_score = 0.0
424
+ important_keywords = ["significant", "important", "critical", "essential", "key",
425
+ "finding", "diagnosis", "recommend", "conclude", "summary"]
426
+ for keyword in important_keywords:
427
+ if re.search(r"\b" + re.escape(keyword) + r"\b", text, re.IGNORECASE):
428
+ keyword_score += 0.05
429
+ keyword_score = min(0.2, keyword_score)
430
+
431
+ # Combine scores
432
+ importance_score = min(1.0, 0.3 * entity_density + position_score + keyword_score)
433
+
434
+ return importance_score
435
+
436
+ def _extract_medical_entities(self, text: str) -> Dict[str, List[str]]:
437
+ """
438
+ Extract medical entities from text by category.
439
+
440
+ Args:
441
+ text: Input text
442
+
443
+ Returns:
444
+ Dictionary of categorized medical entities
445
+ """
446
+ categorized_entities = {}
447
+
448
+ for category, pattern in self.medical_entity_categories.items():
449
+ category_pattern = re.compile(pattern)
450
+ matches = set(m.group(0).lower() for m in category_pattern.finditer(text))
451
+ if matches:
452
+ categorized_entities[category] = list(matches)
453
+
454
+ return categorized_entities
455
+
456
+ def _save_processed_chunks(self, doc_id: str, chunks: List[Dict[str, Any]]):
457
+ """
458
+ Save processed chunks to disk for potential reuse.
459
+
460
+ Args:
461
+ doc_id: Document identifier
462
+ chunks: List of processed chunks
463
+ """
464
+ try:
465
+ import json
466
+
467
+ # Create filename
468
+ filename = f"{doc_id}_processed.json"
469
+ filepath = self.processed_docs_dir / filename
470
+
471
+ # Save chunks without embeddings (to save space)
472
+ chunks_without_embeddings = []
473
+ for chunk in chunks:
474
+ chunk_copy = chunk.copy()
475
+ # Remove embedding as it's large and can be regenerated
476
+ del chunk_copy["embedding"]
477
+ chunks_without_embeddings.append(chunk_copy)
478
+
479
+ with open(filepath, 'w') as f:
480
+ json.dump(chunks_without_embeddings, f)
481
+
482
+ self.logger.info(f"Saved processed chunks to {filepath}")
483
+ except Exception as e:
484
+ self.logger.warning(f"Failed to save processed chunks: {e}")
485
+
486
+ def batch_process_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
487
+ """
488
+ Process a batch of documents.
489
+
490
+ Args:
491
+ documents: List of dictionaries with 'content' and 'metadata' keys
492
+
493
+ Returns:
494
+ List of processed document chunks with embeddings
495
+ """
496
+ all_processed_chunks = []
497
+
498
+ for doc in documents:
499
+ try:
500
+ processed_chunks = self.process_document(doc["content"], doc["metadata"])
501
+ all_processed_chunks.extend(processed_chunks)
502
+ except Exception as e:
503
+ self.logger.error(f"Error processing document: {e}")
504
+ # Continue with the next document
505
+ continue
506
+
507
+ return all_processed_chunks
agents/rag_agent/query_processor.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import re
3
+ import uuid
4
+ from typing import List, Dict, Any, Optional, Tuple, Union
5
+ from datetime import datetime
6
+
7
+ class QueryProcessor:
8
+ """
9
+ Processor for mental health queries with entity extraction and specialty detection.
10
+ """
11
+ def __init__(self, config, embedding_model):
12
+ self.logger = logging.getLogger(__name__)
13
+ self.embedding_model = embedding_model
14
+ self.config = config
15
+
16
+ # Only mental health entity patterns
17
+ self.medical_entity_categories = {
18
+ "mental_health": r"(stress|anxiety|depression|suicide|coping|therapy|counseling|mindfulness|panic|self[- ]harm|hopelessness|support group|resilience|burnout|well-being|mental health)",
19
+ "symptom": r"(insomnia|mood swings|irritability|concentration issues|memory problems|appetite changes|sleep disturbances|social withdrawal|fatigue|hopelessness)",
20
+ "treatment": r"(medication|therapy|cognitive behavioral therapy|CBT|dialectical behavior therapy|DBT|exposure therapy|medication management|psychiatric evaluation|support group|mindfulness training)",
21
+ "diagnosis": r"(bipolar disorder|schizophrenia|post-traumatic stress disorder|PTSD|obsessive[- ]compulsive disorder|OCD|generalized anxiety disorder|GAD|major depressive disorder|MDD|panic disorder|social anxiety disorder|SAD)",
22
+ "risk_factor": r"(genetic predisposition|family history|trauma|substance abuse|chronic illness|stressful life events|social isolation|poor coping skills|low resilience|lack of support)",
23
+ "intervention": r"(cognitive restructuring|exposure therapy|mindfulness meditation|relaxation techniques|stress management|problem-solving skills|assertiveness training|social skills training|self-care strategies|crisis intervention)"
24
+ }
25
+
26
+ # Only mental health specialty keywords
27
+ self.specialty_keywords = {
28
+ "psychiatry": [
29
+ "mental health", "depression", "anxiety", "psychiatric", "disorder",
30
+ "schizophrenia", "bipolar", "therapy", "behavioral", "psychological",
31
+ "stress", "counseling", "mindfulness", "panic", "self-harm", "support group",
32
+ "resilience", "burnout", "well-being"
33
+ ]
34
+ }
35
+
36
+ def process_query(self, query: str) -> Dict[str, Any]:
37
+ """Process query to extract metadata and intent - NOT THE FULL RAG PIPELINE."""
38
+ try:
39
+ # Extract medical entities
40
+ entities = self._extract_medical_entities(query)
41
+
42
+ # Determine query intent
43
+ intent = self._determine_query_intent(query)
44
+
45
+ # Normalize query to handle typos and variations
46
+ normalized_query = self._normalize_query(query)
47
+
48
+ # Determine medical specialty
49
+ specialty = self._detect_specialty(query)
50
+
51
+ # Create metadata/filters for retrieval
52
+ filters = {
53
+ 'query_id': str(uuid.uuid4()),
54
+ 'timestamp': datetime.now().isoformat(),
55
+ 'query_intent': intent,
56
+ 'medical_entities': entities,
57
+ 'specialty': specialty
58
+ }
59
+
60
+ self.logger.info(f"Processed query with filters: {filters}")
61
+
62
+ return {
63
+ 'filters': filters,
64
+ 'intent': intent,
65
+ 'entities': entities,
66
+ 'expanded_query': self._expand_query(query)
67
+ }
68
+
69
+ except Exception as e:
70
+ self.logger.error(f"Error processing query: {e}")
71
+ return self._get_default_metadata()
72
+
73
+
74
+
75
+ def _normalize_query(self, query: str) -> str:
76
+ """Normalize query to handle common typos and variations."""
77
+ import re
78
+
79
+ # Convert to lowercase
80
+ normalized = query.lower()
81
+
82
+ # Fix common typos or abbreviations
83
+ typo_corrections = {
84
+ r'\brad\b': 'road', # rad -> road
85
+ r'\bdepressed\b': 'depression',
86
+ r'\bim\b': "i'm",
87
+ r'\bu\b': 'you',
88
+ r'\bur\b': 'your',
89
+ }
90
+
91
+ for pattern, replacement in typo_corrections.items():
92
+ normalized = re.sub(pattern, replacement, normalized)
93
+
94
+ return normalized
95
+
96
+ def _expand_query(self, query: str) -> str:
97
+ """Expand query with related terms."""
98
+ expansions = {
99
+ "depression": "depression mood sadness hopelessness",
100
+ "anxiety": "anxiety worry nervousness panic",
101
+ "stress": "stress pressure tension burnout",
102
+ "therapy": "therapy counseling psychotherapy CBT DBT",
103
+ "suicide": "suicide self-harm hopelessness crisis",
104
+ "support": "support group counseling help"
105
+ }
106
+ expanded = query
107
+ for term, expansion in expansions.items():
108
+ if re.search(r"\b" + re.escape(term) + r"\b", query.lower()):
109
+ expanded = f"{expanded} {expansion}"
110
+ return expanded
111
+
112
+ def _extract_medical_entities(self, text: str) -> Dict[str, List[str]]:
113
+ """Extract categorized medical entities from text."""
114
+ categorized_entities = {}
115
+ for category, pattern in self.medical_entity_categories.items():
116
+ category_pattern = re.compile(pattern, re.IGNORECASE)
117
+ matches = set(m.group(0).lower() for m in category_pattern.finditer(text))
118
+ if matches:
119
+ categorized_entities[category] = list(matches)
120
+ return categorized_entities
121
+
122
+ def _detect_specialty(self, text: str) -> Optional[str]:
123
+ """Detect medical specialty from text."""
124
+ text_lower = text.lower()
125
+ for specialty, keywords in self.specialty_keywords.items():
126
+ for keyword in keywords:
127
+ if re.search(r"\b" + re.escape(keyword.lower()) + r"\b", text_lower):
128
+ return specialty
129
+ return None
130
+
131
+ def _determine_query_intent(self, text: str) -> str:
132
+ """Determine the intent of the query."""
133
+ text_lower = text.lower()
134
+ if re.search(r"\b(what is|define|explain|describe|meaning of)\b", text_lower):
135
+ return "definition"
136
+ elif re.search(r"\b(treat|therapy|medication|cure|manage|drug|prescription)\b", text_lower):
137
+ return "treatment"
138
+ elif re.search(r"\b(diagnose|diagnostic|symptom|sign|identify|determine)\b", text_lower):
139
+ return "diagnosis"
140
+ elif re.search(r"\b(prevent|preventive|avoid|risk factor|reduction)\b", text_lower):
141
+ return "prevention"
142
+ return "general_information"
agents/rag_agent/reranker.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List, Dict, Any
3
+ from sentence_transformers import CrossEncoder
4
+ from langchain_core.documents import Document
5
+
6
+ class Reranker:
7
+ """
8
+ Reranks retrieved documents using a cross-encoder model for more accurate results.
9
+ """
10
+ def __init__(self, config):
11
+ """
12
+ Initialize the reranker with configuration.
13
+ Args:
14
+ config: Configuration object containing reranker settings
15
+ """
16
+ self.logger = logging.getLogger(__name__)
17
+ try:
18
+ if not hasattr(config.rag, "reranker_model"):
19
+ raise ValueError("Missing 'reranker_model' in config.rag. Please add it to config/rag.yaml.")
20
+ self.model_name = config.rag.reranker_model
21
+ self.logger.info(f"Loading reranker model: {self.model_name}")
22
+ self.model = CrossEncoder(self.model_name)
23
+ self.top_k = getattr(config.rag, "reranker_top_k", 5)
24
+ except Exception as e:
25
+ self.logger.error(f"Error loading reranker model: {e}")
26
+ raise
27
+
28
+ def rerank(self, query: str, documents: List[Document], top_k: int = 5) -> List[Document]:
29
+ """
30
+ Rerank documents based on relevance to the query.
31
+
32
+ Args:
33
+ query: The user query
34
+ documents: List of Document objects to rerank
35
+ top_k: Number of top documents to return
36
+
37
+ Returns:
38
+ List of reranked Document objects
39
+ """
40
+ if not documents:
41
+ return []
42
+
43
+ try:
44
+ # Create pairs of (query, document_content) for the reranker
45
+ pairs = []
46
+ for doc in documents:
47
+ # Use doc.page_content instead of doc['content']
48
+ content = doc.page_content if hasattr(doc, 'page_content') else str(doc)
49
+ pairs.append([query, content])
50
+
51
+ # Get scores from the reranker
52
+ scores = self.model.predict(pairs)
53
+
54
+ # Combine documents with their scores
55
+ doc_scores = list(zip(documents, scores))
56
+
57
+ # Sort by score (descending)
58
+ doc_scores.sort(key=lambda x: x[1], reverse=True)
59
+
60
+ # Return top_k documents
61
+ reranked_docs = [doc for doc, score in doc_scores[:top_k]]
62
+
63
+ self.logger.info(f"Reranked {len(documents)} documents, returning top {len(reranked_docs)}")
64
+ return reranked_docs
65
+
66
+ except Exception as e:
67
+ self.logger.error(f"Error during reranking: {e}")
68
+ self.logger.warning("Falling back to original ranking")
69
+ return documents[:top_k]
agents/rag_agent/response_generator.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: agents/rag_agent/response_generator.py
2
+
3
+ import logging
4
+ from typing import List, Dict, Any, Optional
5
+ from langchain_core.documents import Document
6
+
7
+ class ResponseGenerator:
8
+ """
9
+ Generates structured responses with empathy, solution, and recommendations using RAG pipeline.
10
+ """
11
+ def __init__(self, config=None, llm=None):
12
+ """Initialize ResponseGenerator with optional config and LLM"""
13
+ self.logger = logging.getLogger(__name__)
14
+ self.config = config
15
+ self.llm = llm
16
+
17
+ # Set default values if config is provided
18
+ if config and hasattr(config, 'rag'):
19
+ self.max_context_length = getattr(config.rag, "max_context_length", 2048)
20
+ self.include_sources = getattr(config.rag, "include_sources", True)
21
+ else:
22
+ self.max_context_length = 2048
23
+ self.include_sources = True
24
+
25
+ def generate_response(self, query: str, retrieved_docs: List[Any],
26
+ chat_history: Optional[str] = None,
27
+ user_emotion: Optional[str] = None,
28
+ mental_health_status: Optional[str] = None,
29
+ user_context: Optional[Dict] = None) -> Dict[str, Any]:
30
+ """Generate structured response using RAG pipeline with guaranteed structure."""
31
+
32
+ try:
33
+ print(f"[ResponseGenerator] Processing: {query[:50]}...")
34
+ print(f"[ResponseGenerator] Emotion: {user_emotion}, Status: {mental_health_status}")
35
+
36
+ # Extract sources from documents
37
+ sources = self._extract_sources(retrieved_docs)
38
+
39
+ # Build context from retrieved documents
40
+ context = self._build_context_from_docs(retrieved_docs)
41
+
42
+ # Get user info
43
+ emotion = user_emotion or "neutral"
44
+ status = mental_health_status or "Unknown"
45
+ message_count = user_context.get('message_count', 1) if user_context else 1
46
+
47
+ # Try RAG-enhanced structured response first
48
+ try:
49
+ print("[ResponseGenerator] Generating RAG-enhanced structured response...")
50
+ response_text = self._generate_rag_structured_response(
51
+ query, context, emotion, status, message_count
52
+ )
53
+
54
+ # Verify structure is present
55
+ if self._verify_response_structure(response_text):
56
+ print("[ResponseGenerator] RAG response has complete structure")
57
+ else:
58
+ print("[ResponseGenerator] RAG response missing structure, enhancing...")
59
+ response_text = self._enhance_with_guaranteed_structure(
60
+ response_text, query, emotion, status
61
+ )
62
+
63
+ except Exception as llm_error:
64
+ print(f"[ResponseGenerator] LLM generation failed: {llm_error}")
65
+ print("[ResponseGenerator] Using guaranteed structured fallback...")
66
+ response_text = self._build_structured_response(query, emotion, status)
67
+
68
+ confidence = self._calculate_confidence(sources)
69
+
70
+ print(f"[ResponseGenerator] Final response: {response_text[:100]}...")
71
+
72
+ return {
73
+ "response": response_text,
74
+ "sources": sources,
75
+ "confidence": confidence
76
+ }
77
+
78
+ except Exception as e:
79
+ self.logger.error(f"Error generating response: {e}")
80
+ return self._generate_guaranteed_structure(query, user_emotion, mental_health_status, user_context)
81
+
82
+ def _build_context_from_docs(self, retrieved_docs: List[Any]) -> str:
83
+ """Build context from RAG pipeline retrieved documents."""
84
+ context_parts = []
85
+
86
+ for i, doc in enumerate(retrieved_docs[:3]):
87
+ content = ""
88
+ if hasattr(doc, 'page_content'):
89
+ content = doc.page_content
90
+ elif isinstance(doc, dict):
91
+ content = doc.get('content', doc.get('page_content', ''))
92
+ else:
93
+ content = str(doc)
94
+
95
+ if content:
96
+ # Truncate for context window
97
+ truncated_content = content[:400] + "..." if len(content) > 400 else content
98
+ context_parts.append(f"[Document {i+1}]\n{truncated_content}")
99
+
100
+ return "\n\n".join(context_parts) if context_parts else "No specific context available."
101
+
102
+ def _generate_rag_structured_response(self, query: str, context: str, emotion: str, status: str, message_count: int) -> str:
103
+ """Generate response using RAG context with structured prompt."""
104
+
105
+ if not self.llm:
106
+ # Fallback if no LLM is provided
107
+ return self._build_structured_response(query, emotion, status)
108
+
109
+ structured_prompt = f"""You are a compassionate mental health support assistant. Using the provided context, create a response with EXACTLY 3 sections:
110
+
111
+ USER QUERY: "{query}"
112
+ USER EMOTION: {emotion}
113
+ MENTAL HEALTH STATUS: {status}
114
+ MESSAGE COUNT: {message_count}
115
+
116
+ CONTEXT FROM RAG PIPELINE:
117
+ {context}
118
+
119
+ CRITICAL: Your response MUST have ALL THREE sections in this order:
120
+
121
+ 1. EMPATHY/ACKNOWLEDGEMENT (Start with "I understand..." or "I hear..." or "I can see..."):
122
+ - Acknowledge their specific feelings from the query
123
+ - Validate their experience
124
+ - Show understanding and support
125
+
126
+ 2. SOLUTION/INFORMATION (Include words like "can help", "try", "practice", "research shows"):
127
+ - Use the context to provide relevant information about their concern
128
+ - Explain what might be happening or why they feel this way
129
+ - Offer evidence-based insights or coping strategies
130
+
131
+ 3. RECOMMENDATIONS (Include words like "recommend", "consider", "suggest"):
132
+ - Give concrete next steps based on their status ({status})
133
+ - Suggest professional help if needed
134
+ - Provide specific actions they can take
135
+
136
+ Use the RAG context to make your response more informative and specific. Keep it 6-9 sentences total. Be warm and conversational, not clinical.
137
+
138
+ Response:"""
139
+
140
+ try:
141
+ response = self.llm.invoke(structured_prompt)
142
+ return response.content if hasattr(response, 'content') else str(response)
143
+ except Exception as e:
144
+ print(f"Error invoking LLM: {e}")
145
+ return self._build_structured_response(query, emotion, status)
146
+
147
+ def _verify_response_structure(self, response_text: str) -> bool:
148
+ """Verify the response has all three required sections."""
149
+
150
+ # Check for empathy keywords
151
+ has_empathy = any(word in response_text.lower() for word in [
152
+ 'understand', 'hear', 'see', 'sorry', 'valid', 'difficult', 'acknowledge'
153
+ ])
154
+
155
+ # Check for solution keywords
156
+ has_solution = any(word in response_text.lower() for word in [
157
+ 'try', 'practice', 'can help', 'technique', 'strategy', 'approach',
158
+ 'research shows', 'studies', 'evidence'
159
+ ])
160
+
161
+ # Check for recommendation keywords
162
+ has_recommendations = any(word in response_text.lower() for word in [
163
+ 'recommend', 'consider', 'suggest', 'professional', 'counselor',
164
+ 'therapist', 'healthcare'
165
+ ])
166
+
167
+ print(f"[Structure Check] Empathy: {has_empathy}, Solution: {has_solution}, Recommendations: {has_recommendations}")
168
+
169
+ return has_empathy and has_solution and has_recommendations
170
+
171
+ def _enhance_with_guaranteed_structure(self, partial_response: str, query: str, emotion: str, status: str) -> str:
172
+ """Enhance partial response to ensure complete structure."""
173
+
174
+ # Analyze what's missing and add it
175
+ has_empathy = any(word in partial_response.lower() for word in ['understand', 'hear', 'sorry', 'valid'])
176
+ has_solution = any(word in partial_response.lower() for word in ['try', 'practice', 'can help', 'strategy'])
177
+ has_recommendations = any(word in partial_response.lower() for word in ['recommend', 'consider', 'suggest'])
178
+
179
+ enhanced_parts = []
180
+
181
+ # Add empathy if missing
182
+ if not has_empathy:
183
+ empathy = self._generate_empathy_section(query, emotion)
184
+ enhanced_parts.append(empathy)
185
+
186
+ # Add the existing response
187
+ enhanced_parts.append(partial_response)
188
+
189
+ # Add solution if missing
190
+ if not has_solution:
191
+ solution = self._generate_solution_section(query, emotion)
192
+ enhanced_parts.append(solution)
193
+
194
+ # Add recommendations if missing
195
+ if not has_recommendations:
196
+ recommendations = self._generate_recommendations_section(query, status)
197
+ enhanced_parts.append(recommendations)
198
+
199
+ return " ".join(enhanced_parts)
200
+
201
+ def _generate_empathy_section(self, query: str, emotion: str) -> str:
202
+ """Generate empathy section based on query."""
203
+ query_lower = query.lower()
204
+
205
+ if "stress" in query_lower and ("school" in query_lower or "work" in query_lower):
206
+ return "I understand that you're feeling overwhelmed by academic/work pressure, and these feelings are completely valid."
207
+ elif "anxiety" in query_lower:
208
+ return "I hear that anxiety is making things really challenging for you right now."
209
+ elif "sad" in query_lower or "depressed" in query_lower:
210
+ return "I can see that you're going through a difficult time with these heavy feelings."
211
+ else:
212
+ return f"I understand that you're dealing with {emotion} feelings, and I want you to know your experience is valid."
213
+
214
+ def _generate_solution_section(self, query: str, emotion: str) -> str:
215
+ """Generate solution section based on query."""
216
+ query_lower = query.lower()
217
+
218
+ if "stress" in query_lower and "school" in query_lower:
219
+ return "Academic stress can be managed through time management techniques and breaking large tasks into smaller, manageable steps."
220
+ elif "anxiety" in query_lower:
221
+ return "Anxiety can be helped through breathing techniques and grounding exercises that activate your body's relaxation response."
222
+ else:
223
+ return "There are proven strategies that can help you manage these feelings and improve your well-being over time."
224
+
225
+ # def _generate_recommendations_section(self, query: str, status: str) -> str:
226
+ # """Generate recommendations based on status and query."""
227
+
228
+ # if status == "Severe":
229
+ # return "I strongly recommend reaching out to a mental health professional immediately, and consider calling 988 if you need crisis support."
230
+ # elif "school" in query.lower():
231
+ # return "Consider speaking with a school counselor and practicing stress-reduction techniques like regular breaks and exercise."
232
+ # else:
233
+ # return "I recommend considering professional support and incorporating daily stress-reduction activities into your routine."
234
+
235
+ def _extract_sources(self, retrieved_docs: List[Any]) -> List[Dict]:
236
+ """Extract sources from retrieved documents."""
237
+ sources = []
238
+
239
+ for i, doc in enumerate(retrieved_docs[:3]):
240
+ if hasattr(doc, 'page_content'):
241
+ content = doc.page_content
242
+ metadata = doc.metadata if hasattr(doc, 'metadata') else {}
243
+ source = metadata.get('source', f'Document {i+1}')
244
+ score = metadata.get('score', 0.5)
245
+ elif isinstance(doc, dict):
246
+ content = doc.get('content', doc.get('page_content', ''))
247
+ metadata = doc.get('metadata', {})
248
+ source = metadata.get('source', doc.get('source', f'Document {i+1}'))
249
+ score = doc.get('score', metadata.get('score', 0.5))
250
+ else:
251
+ content = str(doc)
252
+ source = f'Document {i+1}'
253
+ score = 0.5
254
+
255
+ if content:
256
+ sources.append({
257
+ "source": source,
258
+ "score": float(score),
259
+ "snippet": content[:100] + "..." if len(content) > 100 else content
260
+ })
261
+
262
+ return sources
263
+
264
+ def _build_structured_response(self, query: str, emotion: str, status: str) -> str:
265
+ """Build guaranteed structured response (fallback method)."""
266
+
267
+ query_lower = query.lower()
268
+
269
+ print(f"[StructureBuilder] Building guaranteed structure for: {query_lower[:30]}...")
270
+
271
+ # 1. EMPATHY/ACKNOWLEDGMENT
272
+ if "sad" in query_lower and ("depressed" in query_lower or "depression" in query_lower):
273
+ empathy = "I understand you're going through a really difficult time with sadness and depression. These feelings can be overwhelming and exhausting, and I want you to know that reaching out shows real strength."
274
+ elif "stress" in query_lower and ("school" in query_lower or "work" in query_lower):
275
+ empathy = "I hear that you're feeling really stressed about your school/work responsibilities. Academic and work pressure can be overwhelming, and it's completely valid to feel this way."
276
+ elif "anxiety" in query_lower or "anxious" in query_lower:
277
+ empathy = "I understand that anxiety can feel incredibly overwhelming and scary. What you're experiencing is very real, and your struggle with this is completely valid."
278
+ else:
279
+ empathy = f"I hear that you're dealing with {emotion} feelings, and I want you to know that what you're experiencing is valid and understandable."
280
+
281
+ # 2. SOLUTION/INFORMATION
282
+ if "stress" in query_lower and ("school" in query_lower or "work" in query_lower):
283
+ solution = "Academic and work stress can be managed through time management techniques, breaking large tasks into smaller steps, and practicing stress-reduction activities. Research shows that regular breaks and boundary-setting can help you regain control."
284
+ elif "anxiety" in query_lower or "anxious" in query_lower:
285
+ solution = "Anxiety is highly treatable through various approaches including breathing techniques, grounding exercises, and cognitive strategies. Practice deep breathing (inhale for 4, hold for 4, exhale for 6) to help activate your body's relaxation response."
286
+ elif "sad" in query_lower and ("depressed" in query_lower or "depression" in query_lower):
287
+ solution = "Depression involves complex brain chemistry changes that affect mood, energy, and motivation. Research shows that combining professional support with self-care practices can help improve symptoms over time."
288
+ else:
289
+ solution = "There are proven strategies and techniques that can help you manage these feelings and improve your overall mental well-being through consistent practice and the right support."
290
+
291
+ # 3. RECOMMENDATIONS
292
+ if status == "Severe":
293
+ recommendations = "I strongly recommend reaching out to a mental health professional immediately for proper assessment and support. You can also call the crisis helpline at 988 if you need immediate assistance."
294
+ elif "school" in query_lower or "work" in query_lower:
295
+ recommendations = "Consider speaking with a counselor about stress management, practice setting boundaries with your workload, and explore stress-reduction activities like regular exercise or meditation that fit your schedule."
296
+ else:
297
+ recommendations = "Consider speaking with a mental health professional for personalized guidance and support. You might also try incorporating stress-reduction activities like deep breathing exercises, regular physical activity, or journaling into your routine."
298
+
299
+ final_response = f"{empathy} {solution} {recommendations}"
300
+ print(f"[StructureBuilder] Built guaranteed response with {len(final_response)} characters")
301
+
302
+ return final_response
303
+
304
+ def _calculate_confidence(self, sources: List[Dict[str, Any]]) -> float:
305
+ """Calculate confidence based on sources."""
306
+ if not sources:
307
+ return 0.4
308
+
309
+ scores = [s.get('score', 0) for s in sources[:3]]
310
+ avg_score = sum(scores) / len(scores) if scores else 0.0
311
+
312
+ if len(sources) >= 3 and avg_score > 0.5:
313
+ return min(avg_score * 1.2, 1.0)
314
+
315
+ return max(avg_score, 0.4)
316
+
317
+ def _generate_guaranteed_structure(self, query: str, emotion: str, status: str, user_context: Dict) -> Dict[str, Any]:
318
+ """Generate fallback response with guaranteed structure."""
319
+
320
+ structured_response = self._build_structured_response(query, emotion or "concerned", status or "Unknown")
321
+
322
+ return {
323
+ "response": structured_response,
324
+ "sources": [],
325
+ "confidence": 0.4
326
+ }
agents/rag_agent/vector_store.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional, Union
2
+ import logging
3
+ import uuid
4
+ import numpy as np
5
+ from qdrant_client import QdrantClient
6
+ from qdrant_client.http import models as qdrant_models
7
+ from qdrant_client.http.exceptions import UnexpectedResponse
8
+
9
+ # Import Document if available, otherwise define a minimal stub
10
+ try:
11
+ from langchain.schema import Document
12
+ except ImportError:
13
+ class Document:
14
+ def __init__(self, page_content, metadata):
15
+ self.page_content = page_content
16
+ self.metadata = metadata
17
+
18
+ class QdrantRetriever:
19
+ """
20
+ Handles storage and retrieval of medical documents using Qdrant vector database.
21
+ """
22
+ def __init__(self, config):
23
+ """
24
+ Initialize the Qdrant retriever with configuration.
25
+ Args:
26
+ config: Configuration object containing Qdrant settings
27
+ """
28
+ self.logger = logging.getLogger(__name__)
29
+ self.collection_name = config.rag.collection_name
30
+ self.embedding_dim = config.rag.embedding_dim
31
+ self.distance_metric = config.rag.distance_metric
32
+
33
+ # Force in-memory mode for now
34
+ self.client = QdrantClient(":memory:")
35
+
36
+
37
+ # # Initialize Qdrant client
38
+ # if getattr(config.rag, "use_local", True):
39
+ # self.client = QdrantClient(
40
+ # path=config.rag.local_path
41
+ # )
42
+ # else:
43
+ # self.client = QdrantClient(
44
+ # url=getattr(config.rag, "url", None),
45
+ # api_key=getattr(config.rag, "api_key", None),
46
+ # )
47
+
48
+ # Ensure collection exists
49
+ self._ensure_collection()
50
+
51
+
52
+
53
+ def _ensure_collection(self):
54
+ """Create collection if it doesn't exist."""
55
+ try:
56
+ collections = self.client.get_collections().collections
57
+ collection_names = [collection.name for collection in collections]
58
+ if self.collection_name not in collection_names:
59
+ self.logger.info(f"Creating new collection: {self.collection_name}")
60
+ self.client.create_collection(
61
+ collection_name=self.collection_name,
62
+ vectors_config=qdrant_models.VectorParams(
63
+ size=self.embedding_dim,
64
+ distance=self.distance_metric,
65
+ ),
66
+ optimizers_config=qdrant_models.OptimizersConfigDiff(
67
+ indexing_threshold=10000,
68
+ ),
69
+ )
70
+ self.logger.info(f"Collection {self.collection_name} created successfully")
71
+ except Exception as e:
72
+ self.logger.error(f"Error creating collection: {e}")
73
+ raise
74
+
75
+ def upsert_documents(self, documents: List[Dict[str, Any]]):
76
+ """
77
+ Insert or update documents in the vector database.
78
+ Args:
79
+ documents: List of document dictionaries containing:
80
+ - id: Unique identifier
81
+ - embedding: Vector embedding
82
+ - metadata: Document metadata
83
+ - content: Document content
84
+ """
85
+ try:
86
+ points = []
87
+ for doc in documents:
88
+ points.append(
89
+ qdrant_models.PointStruct(
90
+ id=doc["id"],
91
+ vector=doc["embedding"],
92
+ payload={
93
+ "content": doc["content"],
94
+ "source": doc["metadata"].get("source", ""),
95
+ "specialty": doc["metadata"].get("specialty", ""),
96
+ "section": doc["metadata"].get("section", ""),
97
+ "publication_date": doc["metadata"].get("publication_date", ""),
98
+ "medical_entities": doc["metadata"].get("medical_entities", []),
99
+ "chunk_number": doc["metadata"].get("chunk_number", 0),
100
+ "total_chunks": doc["metadata"].get("total_chunks", 1),
101
+ }
102
+ )
103
+ )
104
+ self.client.upsert(
105
+ collection_name=self.collection_name,
106
+ points=points,
107
+ wait=True
108
+ )
109
+ self.logger.info(f"Successfully upserted {len(documents)} documents")
110
+ except Exception as e:
111
+ self.logger.error(f"Error upserting documents: {e}")
112
+ raise
113
+
114
+ # Update the retrieve method to properly return Document objects:
115
+ def retrieve(self, query_embedding: np.ndarray, top_k: int = 5, **kwargs) -> List[Document]:
116
+ """
117
+ Retrieve similar documents based on query embedding.
118
+
119
+ Args:
120
+ query_embedding: Query vector
121
+ top_k: Number of results to return
122
+ **kwargs: Additional parameters (for compatibility)
123
+
124
+ Returns:
125
+ List of Document objects
126
+ """
127
+ try:
128
+ # Handle the case where query_embedding might be passed as a dict
129
+ if isinstance(query_embedding, dict):
130
+ # If it's a dict, it might be from query_processor
131
+ # Extract the actual embedding
132
+ if 'embedding' in query_embedding:
133
+ query_embedding = query_embedding['embedding']
134
+ else:
135
+ self.logger.error(f"Invalid query_embedding format: {type(query_embedding)}")
136
+ return []
137
+
138
+ # Ensure query_embedding is a list
139
+ if isinstance(query_embedding, np.ndarray):
140
+ query_vector = query_embedding.tolist()
141
+ else:
142
+ query_vector = list(query_embedding)
143
+
144
+ # Search in Qdrant
145
+ search_results = self.client.search(
146
+ collection_name=self.collection_name,
147
+ query_vector=query_vector,
148
+ limit=top_k
149
+ )
150
+
151
+ # Convert to Document objects
152
+ documents = []
153
+ for result in search_results:
154
+ # Extract content from payload
155
+ content = result.payload.get('content', '')
156
+
157
+ # Create metadata including the score
158
+ metadata = {k: v for k, v in result.payload.items() if k != 'content'}
159
+ metadata['score'] = result.score
160
+ metadata['id'] = str(result.id)
161
+
162
+ # Create Document object
163
+ doc = Document(
164
+ page_content=content,
165
+ metadata=metadata
166
+ )
167
+ documents.append(doc)
168
+
169
+ self.logger.info(f"Retrieved {len(documents)} documents for query")
170
+ return documents
171
+
172
+ except Exception as e:
173
+ self.logger.error(f"Error retrieving documents: {e}")
174
+ import traceback
175
+ self.logger.error(f"Traceback: {traceback.format_exc()}")
176
+ return []
177
+
178
+ def delete_documents(self, document_ids: List[Union[str, int]]):
179
+ """
180
+ Delete documents from the vector database by their IDs.
181
+ Args:
182
+ document_ids: List of document IDs to delete
183
+ """
184
+ try:
185
+ self.client.delete(
186
+ collection_name=self.collection_name,
187
+ points_selector=qdrant_models.PointIdsList(
188
+ points=document_ids
189
+ ),
190
+ wait=True
191
+ )
192
+ self.logger.info(f"Successfully deleted {len(document_ids)} documents")
193
+ except Exception as e:
194
+ self.logger.error(f"Error deleting documents: {e}")
195
+ raise
196
+
197
+ def wipe_collection(self):
198
+ """Completely remove and recreate the collection for fresh start."""
199
+ try:
200
+ self.client.delete_collection(self.collection_name)
201
+ self._ensure_collection()
202
+ self.logger.info(f"Collection {self.collection_name} wiped and recreated")
203
+ except Exception as e:
204
+ self.logger.error(f"Error wiping collection: {e}")
205
+ raise
206
+
207
+ # Add this method to the QdrantRetriever class:
208
+
209
+ def get_collection_info(self) -> Dict:
210
+ """Get information about the collection."""
211
+ try:
212
+ collection = self.client.get_collection(self.collection_name)
213
+ return {
214
+ "name": self.collection_name,
215
+ "vectors_count": collection.vectors_count,
216
+ "points_count": collection.points_count,
217
+ "status": collection.status,
218
+ "config": {
219
+ "size": collection.config.params.vectors.size,
220
+ "distance": collection.config.params.vectors.distance
221
+ }
222
+ }
223
+ except Exception as e:
224
+ self.logger.error(f"Error getting collection info: {e}")
225
+ return {"error": str(e), "vectors_count": 0}
226
+
227
+ def get_collection_stats(self) -> Dict[str, Any]:
228
+ """
229
+ Retrieve statistics of the collection.
230
+ Returns:
231
+ Dictionary containing collection statistics.
232
+ """
233
+ try:
234
+ stats = self.client.get_collection(self.collection_name)
235
+ self.logger.info(f"Collection stats retrieved successfully: {stats}")
236
+ return stats.model_dump()
237
+ except Exception as e:
238
+ self.logger.error(f"Error getting collection stats: {e}")
239
+ raise
240
+
241
+
242
+ # Add these methods to the QdrantRetriever class:
243
+
244
+ def add_documents(self, documents: List[Document]) -> int:
245
+ """
246
+ Add documents to the vector store.
247
+
248
+ Args:
249
+ documents: List of documents with embeddings
250
+
251
+ Returns:
252
+ Number of documents added
253
+ """
254
+ if not documents:
255
+ return 0
256
+
257
+ points = []
258
+ for idx, doc in enumerate(documents):
259
+ if not hasattr(doc, 'metadata') or 'embedding' not in doc.metadata:
260
+ self.logger.warning(f"Document {idx} missing embedding, skipping")
261
+ continue
262
+
263
+ point_id = str(uuid.uuid4())
264
+ embedding = doc.metadata['embedding']
265
+
266
+ # Remove embedding from metadata before storing
267
+ metadata = {k: v for k, v in doc.metadata.items() if k != 'embedding'}
268
+ metadata['content'] = doc.page_content
269
+
270
+ points.append(
271
+ qdrant_models.PointStruct(
272
+ id=point_id,
273
+ vector=embedding,
274
+ payload=metadata
275
+ )
276
+ )
277
+
278
+ if points:
279
+ self.client.upsert(
280
+ collection_name=self.collection_name,
281
+ points=points
282
+ )
283
+ self.logger.info(f"Added {len(points)} documents to vector store")
284
+
285
+ return len(points)
286
+
287
+
288
+
289
+ def clear_collection(self):
290
+ """Clear all documents from the collection."""
291
+ try:
292
+ # Delete and recreate the collection
293
+ self.client.delete_collection(self.collection_name)
294
+ self._create_collection()
295
+ self.logger.info(f"Collection {self.collection_name} cleared successfully")
296
+ except Exception as e:
297
+ self.logger.error(f"Error clearing collection: {e}")
298
+ raise
agents/shared_rag.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import threading
3
+ import time
4
+ from sentence_transformers import SentenceTransformer
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from config.config import Config
7
+ from agents.rag_agent import MedicalRAG
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class LazyEmbeddingModel:
12
+ """Lazy loading wrapper for SentenceTransformer model"""
13
+ def __init__(self, model_name="all-MiniLM-L6-v2"):
14
+ self.model_name = model_name
15
+ self._model = None
16
+
17
+ def _ensure_loaded(self):
18
+ if self._model is None:
19
+ logger.info(f"📊 Loading embedding model: {self.model_name}...")
20
+ self._model = SentenceTransformer(self.model_name)
21
+ logger.info("✅ Embedding model loaded successfully")
22
+ return self._model
23
+
24
+ def encode(self, *args, **kwargs):
25
+ model = self._ensure_loaded()
26
+ return model.encode(*args, **kwargs)
27
+
28
+ def __getattr__(self, name):
29
+ # Delegate all other attributes to the actual model
30
+ model = self._ensure_loaded()
31
+ return getattr(model, name)
32
+
33
+ class SharedRAG:
34
+ """Singleton RAG instance for sharing across FastAPI and Flask backends."""
35
+
36
+ _instance = None
37
+ _lock = threading.Lock()
38
+ _initialized = False
39
+ _initialization_time = None
40
+
41
+ def __new__(cls):
42
+ if cls._instance is None:
43
+ with cls._lock:
44
+ if cls._instance is None:
45
+ cls._instance = super(SharedRAG, cls).__new__(cls)
46
+ return cls._instance
47
+
48
+ def __init__(self):
49
+ if not self._initialized:
50
+ current_time = time.time()
51
+ logger.info("🚀 Initializing shared RAG instance...")
52
+
53
+ try:
54
+ # Initialize config
55
+ self.config = Config()
56
+
57
+ # Initialize models (lazy loading for memory optimization)
58
+ logger.info("📊 Embedding model will be loaded when needed...")
59
+ self.embedding_model = LazyEmbeddingModel("all-MiniLM-L6-v2")
60
+
61
+ logger.info("🤖 Loading LLM...")
62
+ self.llm = ChatGoogleGenerativeAI(
63
+ model="gemini-2.0-flash",
64
+ temperature=0.1,
65
+ max_tokens=1024
66
+ )
67
+
68
+ # Initialize RAG (will get embedding model when needed)
69
+ logger.info("📚 Initializing MedicalRAG...")
70
+ # Pass the lazy embedding model
71
+ self.rag = MedicalRAG(self.config, self.llm, self.embedding_model)
72
+
73
+ # Ensure knowledge is ingested
74
+ self._ensure_knowledge_ingested()
75
+
76
+ SharedRAG._initialized = True
77
+ SharedRAG._initialization_time = current_time
78
+ logger.info(f"✅ Shared RAG instance ready in {time.time() - current_time:.2f}s")
79
+
80
+ except Exception as e:
81
+ logger.error(f"❌ Error initializing SharedRAG: {str(e)}")
82
+ raise e
83
+
84
+ def get_embedding_model(self):
85
+ """Get the embedding model (lazy loading wrapper)"""
86
+ return self.embedding_model
87
+
88
+ def _ensure_knowledge_ingested(self):
89
+ """Ensure knowledge base is populated."""
90
+ try:
91
+ # Check if collection has documents
92
+ collection_info = self.rag.retriever.get_collection_info()
93
+ existing_vectors = collection_info.get('points_count', 0)
94
+
95
+ logger.info(f"📋 Collection status: {existing_vectors} documents")
96
+
97
+ if existing_vectors == 0:
98
+ logger.info("📥 Empty collection, ingesting knowledge...")
99
+ result = self.rag.ingest_knowledge_folder("knowledge")
100
+ logger.info(f"✅ Ingestion complete: {result.get('successful', 0)} files processed")
101
+ else:
102
+ # Test retrieval with a simple query using lazy-loaded embedding model
103
+ logger.info("✅ Knowledge base ready (testing embedding on-demand)")
104
+ # The embedding model will load automatically when needed
105
+
106
+ except Exception as e:
107
+ logger.error(f"❌ Error checking/ingesting knowledge: {e}")
108
+ import traceback
109
+ logger.error(traceback.format_exc())
110
+
111
+ def get_rag(self):
112
+ """Get the RAG instance."""
113
+ current_time = time.time()
114
+
115
+ # Check if recently initialized (within 30 seconds)
116
+ if (self._initialized and
117
+ self._initialization_time and
118
+ (current_time - self._initialization_time) < 30):
119
+ logger.info(f"⚡ RAG ready ({current_time - self._initialization_time:.1f}s ago)")
120
+
121
+ return self.rag
122
+
123
+ def reingest_knowledge(self):
124
+ """Force re-ingestion of knowledge base."""
125
+ try:
126
+ logger.info("🔄 Force reingesting knowledge...")
127
+ self.rag.retriever.clear_collection()
128
+ result = self.rag.ingest_knowledge_folder("knowledge")
129
+ logger.info(f"✅ Reingestion complete: {result}")
130
+ return result
131
+ except Exception as e:
132
+ logger.error(f"❌ Error reingesting knowledge: {e}")
133
+ raise
134
+
135
+ def get_status(self):
136
+ """Get system status for debugging."""
137
+ try:
138
+ collection_info = self.rag.retriever.get_collection_info()
139
+ return {
140
+ "initialized": self._initialized,
141
+ "initialization_time": self._initialization_time,
142
+ "collection_points": collection_info.get('points_count', 0),
143
+ "crewai_enabled": getattr(self.rag, 'crewai_enabled', False),
144
+ "embedding_model": str(type(self.embedding_model)),
145
+ "llm_model": str(type(self.llm))
146
+ }
147
+ except Exception as e:
148
+ return {"error": str(e)}
149
+
150
+ # Create singleton instance
151
+ shared_rag_instance = SharedRAG()
agents/web_search_processor_agent/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional
2
+ from .web_search_processor import WebSearchProcessor
3
+
4
+ class WebSearchProcessorAgent:
5
+ """
6
+ Agent responsible for processing web search results with structured responses.
7
+ """
8
+
9
+ def __init__(self, config=None):
10
+ self.web_search_processor = WebSearchProcessor(config)
11
+
12
+ def process_web_search_results(self, query: str, chat_history: Optional[List[Dict[str, str]]] = None,
13
+ user_context: Optional[Dict] = None) -> str:
14
+ """
15
+ Process web search results and return structured response.
16
+
17
+ Args:
18
+ query: User query
19
+ chat_history: Previous conversation history
20
+ user_context: User context (emotion, mental health status, etc.)
21
+
22
+ Returns:
23
+ Structured empathy + solution + recommendations response
24
+ """
25
+ return self.web_search_processor.process_query(query, user_context)
agents/web_search_processor_agent/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.87 kB). View file
 
agents/web_search_processor_agent/__pycache__/geminai_search.cpython-311.pyc ADDED
Binary file (7.94 kB). View file
 
agents/web_search_processor_agent/__pycache__/pubmed_search.cpython-311.pyc ADDED
Binary file (7.87 kB). View file
 
agents/web_search_processor_agent/__pycache__/tavily_search.cpython-311.pyc ADDED
Binary file (2.48 kB). View file
 
agents/web_search_processor_agent/__pycache__/web_search_agent.cpython-311.pyc ADDED
Binary file (6.62 kB). View file
 
agents/web_search_processor_agent/__pycache__/web_search_processor.cpython-311.pyc ADDED
Binary file (11.7 kB). View file
 
agents/web_search_processor_agent/geminai_search.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Optional
3
+ import google.generativeai as genai
4
+
5
+ class GeminiSearchAgent:
6
+ """
7
+ Searches for mental health information using Google's Gemini AI.
8
+ """
9
+ def __init__(self):
10
+ """Initialize the Gemini search agent."""
11
+ # Get API key from environment or use the one from your main.py
12
+ api_key = os.environ.get("GOOGLE_API_KEY", "AIzaSyDzBTzKt211XwMurywdk5HFCnFeeFxcRJ0")
13
+ genai.configure(api_key=api_key)
14
+
15
+ # Use gemini-1.5-flash which is currently available
16
+ self.model = genai.GenerativeModel('gemini-1.5-flash')
17
+
18
+ # List available models (for debugging)
19
+ try:
20
+ models = genai.list_models()
21
+ print("Available models:")
22
+ for model in models:
23
+ if 'generateContent' in model.supported_generation_methods:
24
+ print(f" - {model.name}")
25
+ except:
26
+ pass
27
+
28
+ def search_mental_health(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
29
+ """
30
+ Search for mental health information using Gemini AI.
31
+
32
+ Args:
33
+ query: Search query string
34
+ max_results: Maximum number of results to return
35
+
36
+ Returns:
37
+ List of dictionaries containing search results
38
+ """
39
+ try:
40
+ # Enhance query for mental health context
41
+ enhanced_query = f"""
42
+ As a mental health information assistant, provide reliable information about: {query}
43
+
44
+ Focus on:
45
+ 1. Evidence-based mental health resources
46
+ 2. Professional medical sources and recent research
47
+ 3. Practical advice and coping strategies
48
+ 4. Treatment options and therapeutic approaches
49
+
50
+ Please provide specific, actionable information that would be helpful for someone seeking mental health support.
51
+ Format the response with clear sections if applicable.
52
+ """
53
+
54
+ # Generate content
55
+ response = self.model.generate_content(enhanced_query)
56
+
57
+ # Extract the response text
58
+ main_response = response.text if response.text else "No response generated"
59
+
60
+ # Format as a list of results
61
+ results = [{
62
+ "title": "Mental Health Information",
63
+ "content": main_response,
64
+ "source": "Google Gemini AI",
65
+ "type": "ai_summary"
66
+ }]
67
+
68
+ return results
69
+
70
+ except Exception as e:
71
+ print(f"Error details: {str(e)}")
72
+ return [{
73
+ "title": "Error",
74
+ "content": f"Error retrieving information: {str(e)}",
75
+ "source": "Error",
76
+ "type": "error"
77
+ }]
78
+
79
+ def search_specific_topics(self, query: str, topics: List[str]) -> Dict[str, str]:
80
+ """
81
+ Search for specific mental health topics.
82
+
83
+ Args:
84
+ query: Base query
85
+ topics: List of specific topics to include
86
+
87
+ Returns:
88
+ Dictionary with topic-specific information
89
+ """
90
+ results = {}
91
+
92
+ for topic in topics:
93
+ specific_query = f"""
94
+ Provide brief, evidence-based information about: {query} specifically regarding {topic}.
95
+ Keep the response concise but informative, focusing on practical advice and current best practices.
96
+ """
97
+ try:
98
+ response = self.model.generate_content(specific_query)
99
+ content = response.text if response.text else "No information available"
100
+ results[topic] = content
101
+
102
+ except Exception as e:
103
+ results[topic] = f"Error: {str(e)}"
104
+
105
+ return results
106
+
107
+
108
+ # For backward compatibility with existing code
109
+ class PubmedSearchAgent(GeminiSearchAgent):
110
+ """Alias for GeminiSearchAgent to maintain compatibility."""
111
+
112
+ def search_pubmed(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
113
+ """
114
+ Wrapper method to maintain compatibility with existing code.
115
+ Redirects to Gemini search.
116
+ """
117
+ # Get results from Gemini
118
+ gemini_results = self.search_mental_health(query, max_results)
119
+
120
+ # Transform to expected format
121
+ formatted_results = []
122
+ for i, result in enumerate(gemini_results):
123
+ formatted_results.append({
124
+ "title": result.get("title", "Information from Gemini AI"),
125
+ "abstract": result.get("content", "")[:500] + "..." if len(result.get("content", "")) > 500 else result.get("content", ""),
126
+ "url": "", # No direct URL with Gemini
127
+ "authors": "Google Gemini AI",
128
+ "publication_date": "Current",
129
+ "full_content": result.get("content", ""),
130
+ })
131
+
132
+ return formatted_results
133
+
134
+
135
+ # Test function
136
+ if __name__ == "__main__":
137
+ # Test Gemini search
138
+ agent = GeminiSearchAgent()
139
+
140
+ print("\nTesting Gemini Search Agent...")
141
+ print("-" * 60)
142
+
143
+ # Test query
144
+ query = "cognitive behavioral therapy for depression"
145
+ results = agent.search_mental_health(query)
146
+
147
+ for result in results:
148
+ print(f"\nTitle: {result['title']}")
149
+ print(f"Source: {result['source']}")
150
+ print(f"Content: {result['content'][:500]}...")
151
+
152
+ # Test specific topics
153
+ print("\n" + "=" * 60)
154
+ print("Testing specific topics...")
155
+ topics_results = agent.search_specific_topics(
156
+ "anxiety management",
157
+ ["breathing exercises", "medication options", "lifestyle changes"]
158
+ )
159
+
160
+ for topic, content in topics_results.items():
161
+ print(f"\n{topic.upper()}:")
162
+ print(content[:300] + "...")
agents/web_search_processor_agent/web_search_agent.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+ import sys
3
+ import os
4
+
5
+ # Add the parent directory to the path for imports
6
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
7
+
8
+ # Import both Gemini and PubMed agents
9
+ try:
10
+ from .geminai_search import GeminiSearchAgent, PubmedSearchAgent
11
+ except ImportError:
12
+ from agents.web_search_processor_agent.geminai_search import GeminiSearchAgent, PubmedSearchAgent
13
+
14
+ class WebSearchAgent:
15
+ """
16
+ Agent responsible for retrieving real-time medical information from web sources.
17
+ Uses Gemini AI for general queries and maintains PubMed compatibility.
18
+ """
19
+
20
+ def __init__(self, config=None):
21
+ """Initialize the web search agent."""
22
+ # Initialize both search agents
23
+ self.gemini_agent = GeminiSearchAgent()
24
+ self.pubmed_search_agent = PubmedSearchAgent() # This is actually GeminiSearchAgent too
25
+ self.config = config
26
+
27
+ def search(self, query: str) -> str:
28
+ """
29
+ Perform searches using Gemini AI.
30
+
31
+ Args:
32
+ query: Search query string
33
+
34
+ Returns:
35
+ Formatted search results
36
+ """
37
+ print(f"[WebSearchAgent] Searching with Gemini for: {query}")
38
+
39
+ try:
40
+ # Use Gemini search for better results
41
+ results = self.gemini_agent.search_mental_health(query)
42
+
43
+ # Format the results
44
+ formatted_results = self._format_gemini_results(results)
45
+
46
+ return formatted_results
47
+
48
+ except Exception as e:
49
+ print(f"[WebSearchAgent] Error during search: {str(e)}")
50
+ # Fallback to basic response
51
+ return self._get_fallback_response(query)
52
+
53
+ def _format_gemini_results(self, results: List[Dict[str, str]]) -> str:
54
+ """Format Gemini results for display."""
55
+ if not results:
56
+ return "No relevant information found."
57
+
58
+ # Check for error results
59
+ if results[0].get("type") == "error":
60
+ return results[0].get("content", "Error occurred during search")
61
+
62
+ formatted_output = []
63
+
64
+ for result in results:
65
+ if result.get("type") == "ai_summary":
66
+ # Format AI-generated content
67
+ content = result.get("content", "")
68
+ formatted_output.append(content)
69
+ else:
70
+ # Format other types of results
71
+ title = result.get("title", "Information")
72
+ content = result.get("content", "")
73
+ source = result.get("source", "Unknown")
74
+
75
+ formatted_output.append(f"**{title}**\n*Source: {source}*\n\n{content}")
76
+
77
+ return "\n\n".join(formatted_output)
78
+
79
+ def _get_fallback_response(self, query: str) -> str:
80
+ """Provide a fallback response when search fails."""
81
+ return f"""I apologize, but I couldn't retrieve specific information about "{query}" at this moment.
82
+
83
+ Here are some general suggestions:
84
+ 1. Consider consulting with a mental health professional
85
+ 2. Visit reputable mental health websites like NIMH or WHO
86
+ 3. Speak with your healthcare provider for personalized advice
87
+
88
+ Would you like to rephrase your question or ask about something else?"""
89
+
90
+ def search_mental_health_specific(self, query: str) -> str:
91
+ """
92
+ Search specifically for mental health related content.
93
+
94
+ Args:
95
+ query: Search query string
96
+
97
+ Returns:
98
+ Formatted search results with mental health focus
99
+ """
100
+ # The GeminiSearchAgent already adds mental health context
101
+ return self.search(query)
102
+
103
+ def search_with_topics(self, query: str, topics: List[str]) -> str:
104
+ """
105
+ Search for specific topics using Gemini.
106
+
107
+ Args:
108
+ query: Base query
109
+ topics: List of specific topics
110
+
111
+ Returns:
112
+ Formatted results by topic
113
+ """
114
+ try:
115
+ results = self.gemini_agent.search_specific_topics(query, topics)
116
+
117
+ formatted = [f"**Information about {query}:**\n"]
118
+ for topic, content in results.items():
119
+ formatted.append(f"\n### {topic.title()}\n{content}")
120
+
121
+ return "\n".join(formatted)
122
+
123
+ except Exception as e:
124
+ print(f"[WebSearchAgent] Error in topic search: {str(e)}")
125
+ return self._get_fallback_response(query)
agents/web_search_processor_agent/web_search_processor.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict, List, Optional
3
+
4
+ try:
5
+ from .web_search_agent import WebSearchAgent
6
+ except ImportError:
7
+ from agents.web_search_processor_agent.web_search_agent import WebSearchAgent
8
+
9
+ class WebSearchProcessor:
10
+ """
11
+ Processes web search results using structured empathy + solution + recommendations format.
12
+ """
13
+
14
+ def __init__(self, config=None):
15
+ self.config = config
16
+ try:
17
+ self.web_search_agent = WebSearchAgent()
18
+ except Exception as e:
19
+ print(f"⚠️ Web search agent not available: {e}")
20
+ self.web_search_agent = None
21
+
22
+ def process_query(self, query: str, user_context: Optional[Dict] = None) -> str:
23
+ """
24
+ Process query with structured empathy + solution + recommendations format.
25
+
26
+ Args:
27
+ query: User query
28
+ user_context: User context including emotion, mental_health_status, name
29
+
30
+ Returns:
31
+ Structured response string
32
+ """
33
+ try:
34
+ query_lower = query.lower()
35
+ user_context = user_context or {}
36
+
37
+ emotion = user_context.get("emotion", "neutral")
38
+ mental_health_status = user_context.get("mental_health_status", "Unknown")
39
+ user_name = user_context.get("name", "there")
40
+
41
+ # Generate structured response based on query content
42
+ if any(word in query_lower for word in ["depressed", "depression", "sad"]):
43
+ empathy = f"I can hear that you're going through a really difficult time with sadness and depression, {user_name}. Those feelings can be overwhelming and exhausting, and I want you to know that what you're experiencing is completely valid."
44
+ solution = "Depression often involves changes in brain chemistry that affect mood, energy, and motivation. Professional treatment through therapy and/or medication has proven very effective for most people experiencing depression."
45
+ recommendations = "I strongly recommend reaching out to a mental health professional who can provide proper assessment and treatment. In the meantime, try to maintain basic self-care routines, connect with supportive people in your life, and consider contacting the National Mental Health Program at 1717 if you need immediate support."
46
+
47
+ elif any(word in query_lower for word in ["anxious", "anxiety", "worried", "panic"]):
48
+ empathy = f"I understand that anxiety can feel incredibly overwhelming and scary, {user_name}. Those racing thoughts and physical sensations are very real and can be exhausting to deal with."
49
+ solution = "Anxiety is one of the most treatable mental health conditions. Techniques like deep breathing, grounding exercises, and cognitive behavioral therapy have strong research support for managing anxiety symptoms."
50
+ recommendations = "Try practicing 4-7-8 breathing (inhale 4, hold 7, exhale 8), limit caffeine intake, and consider speaking with a counselor who specializes in anxiety treatment. Regular exercise and mindfulness can also significantly help."
51
+
52
+ elif any(word in query_lower for word in ["stress", "overwhelm", "pressure"]):
53
+ empathy = f"It sounds like you're dealing with a lot of stress right now, {user_name}. That pressure can really take a toll on your mental and physical well-being."
54
+ solution = "Stress management involves both addressing the source of stress and building your capacity to handle pressure. Identifying specific stressors and developing coping strategies can make a significant difference."
55
+ recommendations = "Try breaking down overwhelming tasks into smaller steps, practice saying no to additional commitments, and schedule regular breaks. Consider stress-reduction techniques like meditation, exercise, or talking to a therapist."
56
+
57
+ elif any(word in query_lower for word in ["lonely", "alone", "isolated"]):
58
+ empathy = f"I hear that you're feeling lonely, {user_name}. Loneliness can be very difficult to experience, and you're showing strength by reaching out."
59
+ solution = "Loneliness is a common human experience that can be addressed through building meaningful connections and developing a support network."
60
+ recommendations = "Consider joining community groups, volunteering, or participating in activities you enjoy. Online support groups can also provide connection. If loneliness persists, talking to a counselor can help develop strategies for building relationships."
61
+
62
+ elif any(word in query_lower for word in ["sleep", "tired", "exhausted", "insomnia"]):
63
+ empathy = f"It sounds like you're having difficulties with sleep or feeling tired, {user_name}. Sleep issues can significantly impact mental health and daily functioning."
64
+ solution = "Sleep problems often have both physical and mental health components. Good sleep hygiene and addressing underlying stress or anxiety can improve sleep quality."
65
+ recommendations = "Try maintaining a consistent sleep schedule, limiting screen time before bed, and creating a relaxing bedtime routine. Avoid caffeine late in the day. If sleep problems persist, consider consulting a healthcare provider."
66
+
67
+ else:
68
+ # Generic structured response
69
+ empathy = f"Thank you for reaching out and sharing what you're going through, {user_name}. I can sense that you're dealing with some challenges right now, and I want you to know that your experience matters."
70
+ solution = "There are evidence-based strategies and resources available that can help you manage these feelings and improve your mental health over time."
71
+ recommendations = "Consider speaking with a mental health professional for personalized support, practice daily self-care activities, and don't hesitate to reach out to trusted friends or family members for additional support."
72
+
73
+ # Combine the structured response
74
+ structured_response = f"{empathy}\n\n{solution}\n\n{recommendations}"
75
+
76
+ # Add crisis resources for severe cases
77
+ if (mental_health_status in ["Severe", "Crisis"] or
78
+ any(word in query_lower for word in ["suicide", "kill myself", "want to die", "hurt myself"])):
79
+ crisis_addition = "\n\n**🆘 Immediate Support Available:** If you're having thoughts of self-harm, please contact the National Mental Health Program at 1717 (24/7) or Emergency Services at 112 immediately."
80
+ structured_response += crisis_addition
81
+
82
+ return structured_response
83
+
84
+ except Exception as e:
85
+ print(f"[WebSearchProcessor] Error: {e}")
86
+ return f"I'm here to support you, {user_context.get('name', 'there')}, though I'm having some technical difficulties. Please know that whatever you're going through, there are people and resources available to help. For immediate support in Bhutan, contact the National Mental Health Program at 1717."
87
+
88
+ def process_web_results(self, query: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
89
+ """
90
+ Legacy method for compatibility.
91
+ """
92
+ # Build context from chat history
93
+ user_context = {}
94
+ if chat_history and len(chat_history) > 0:
95
+ # Extract user context from recent messages
96
+ recent_messages = chat_history[-3:] if len(chat_history) > 3 else chat_history
97
+ for msg in recent_messages:
98
+ if msg.get("role") == "user":
99
+ content = msg.get('content', '').lower()
100
+ # Simple emotion detection
101
+ if any(word in content for word in ['sad', 'depressed', 'down']):
102
+ user_context['emotion'] = 'sad'
103
+ elif any(word in content for word in ['anxious', 'worried', 'panic']):
104
+ user_context['emotion'] = 'anxious'
105
+ elif any(word in content for word in ['angry', 'frustrated', 'mad']):
106
+ user_context['emotion'] = 'angry'
107
+
108
+ return self.process_query(query, user_context)
109
+
110
+ # Convenience function for direct use
111
+ def search_mental_health_info(query: str, user_context: Optional[Dict] = None) -> str:
112
+ """
113
+ Direct function to search for mental health information.
114
+ """
115
+ processor = WebSearchProcessor()
116
+ return processor.process_query(query, user_context)
chat_sessions/chat_Harsh Narayan_20250802_165208.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "session_id": "20250802_165208",
3
+ "user_name": "Harsh Narayan",
4
+ "messages": [
5
+ {
6
+ "content": "Now I'm upset.",
7
+ "role": "user",
8
+ "timestamp": "2025-08-02T16:52:08.426564",
9
+ "topics": []
10
+ },
11
+ {
12
+ "agent": "RAG_AGENT",
13
+ "content": "I understand you're feeling upset right now. It's completely valid to experience a range of emotions, and I'm here to listen without judgment. It takes courage to acknowledge when you're feeling this way.\n\nResearch shows that sometimes upsetting feelings can arise from internal thoughts and judgments, as one source mentions getting angry and critical with oneself. It can help to practice simply listening to the voice of your thoughts without immediately judging them or trying to shut them down. This can create some space between you and the upsetting feelings.\n\nI recommend considering what might have triggered this feeling and if you can identify any specific thoughts or situations connected to it. If these feelings persist or become overwhelming, I suggest reaching out to a mental health professional for further support. Remember, taking care of your mental well-being is essential.\n",
14
+ "role": "assistant",
15
+ "timestamp": "2025-08-02T16:52:12.085676",
16
+ "topics": [
17
+ "depression"
18
+ ]
19
+ }
20
+ ],
21
+ "metadata": {
22
+ "topics": []
23
+ }
24
+ }
chat_sessions/chat_Harsh Narayan_20250804_163618.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "session_id": "20250804_163618",
3
+ "user_name": "Harsh Narayan",
4
+ "messages": [
5
+ {
6
+ "content": "I am failing sad, what should I do?",
7
+ "role": "user",
8
+ "timestamp": "2025-08-04T16:36:18.921585",
9
+ "topics": [
10
+ "depression"
11
+ ]
12
+ },
13
+ {
14
+ "agent": "RAG_AGENT",
15
+ "content": "I understand you're feeling sad about failing, and that's a really tough feeling to experience. It's completely valid to feel down when things don't go as planned. I'm here to offer support.\n\nRemembering the opposite of your negative feelings can help; if things feel unpleasant, try to focus on peace and relaxation, letting go of the need to do anything. Research shows that being mindful of when you get carried away by negative thoughts and stopping yourself can also be beneficial. Living each day with the intention of bringing joy and kindness can also help shift your perspective.\n\nI recommend considering practicing these techniques when you feel overwhelmed. Since I don't know your full situation, I suggest reaching out to a mental health professional for personalized support and guidance. They can help you explore these feelings further and develop coping strategies.\n",
16
+ "role": "assistant",
17
+ "timestamp": "2025-08-04T16:36:21.540544",
18
+ "topics": [
19
+ "depression",
20
+ "coping"
21
+ ]
22
+ }
23
+ ],
24
+ "metadata": {
25
+ "topics": []
26
+ }
27
+ }
chat_sessions/chat_string_string.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "session_id": "string",
3
+ "user_name": "string",
4
+ "messages": [
5
+ {}
6
+ ],
7
+ "metadata": {}
8
+ }
config/__init__.py ADDED
File without changes
config/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (186 Bytes). View file
 
config/__pycache__/config.cpython-311.pyc ADDED
Binary file (8.95 kB). View file
 
config/agents.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ emotion_detector:
2
+ role: Emotion Detector
3
+ goal: Analyze user input to determine their emotional state.
4
+ backstory: You are an empathetic AI skilled at identifying emotions like sadness, anxiety, or happiness from text.
5
+ memory: true
6
+ verbose: false
7
+
8
+ suggestion_agent:
9
+ role: Suggestion Provider
10
+ goal: Offer 1-2 brief and actionable suggestions to support the user's mental well-being.
11
+ backstory: You are a compassionate advisor who shares short mental health tips like breathing exercises, journaling, but also offers to gently guide the user through them if they seem open to it.
12
+ memory: true
13
+ verbose: false
14
+
15
+ safety_agent:
16
+ role: Safety Monitor
17
+ goal: Check user input for signs of crisis or self-harm and respond appropriately.
18
+ backstory: You are a vigilant guardian trained to detect red flags like mentions of self-harm, prioritizing user safety.
19
+ memory: true
20
+ verbose: false
21
+
22
+ orchestrator:
23
+ role: Workflow Orchestrator
24
+ goal: Coordinate agent tasks to ensure a cohesive and empathetic response, prioritizing safety if needed.
25
+ backstory: You are a strategic coordinator ensuring the chatbot responds with clarity and empathy, tailoring response length naturally.
26
+ memory: true
27
+ verbose: false
28
+
29
+ rag_retriever:
30
+ role: Knowledge Retriever
31
+ goal: Retrieve relevant mental health tips from the text corpus based on the user's emotional state.
32
+ backstory: You are a librarian who finds concise, relevant resources using a custom RAG pipeline.
33
+ memory: true
34
+ verbose: false
35
+
36
+ rag_reader:
37
+ role: Knowledge Summarizer
38
+ goal: Summarize retrieved tips clearly and briefly for the user's emotional state.
39
+ backstory: You are an editor who distills helpful advice into brief, clear summaries.
40
+ memory: true
41
+ verbose: false
42
+
config/config.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration loader for the Mental Health Chatbot
3
+ """
4
+
5
+ import os
6
+ import yaml
7
+ from dataclasses import dataclass
8
+ from typing import Any, Dict, Optional
9
+ from pathlib import Path
10
+
11
+ from langchain_google_genai import ChatGoogleGenerativeAI
12
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
13
+ from sentence_transformers import SentenceTransformer
14
+
15
+
16
+ @dataclass
17
+ class RAGConfig:
18
+ """Configuration for RAG agent"""
19
+ def __init__(self, config_dict: Dict[str, Any]):
20
+ self.config_dict = config_dict
21
+ self.llm = ChatGoogleGenerativeAI(
22
+ model="gemini-1.5-flash",
23
+ temperature=0.3,
24
+ google_api_key=os.environ.get("GOOGLE_API_KEY")
25
+ )
26
+
27
+ # Load from YAML if available
28
+ if 'rag' in config_dict:
29
+ rag_config = config_dict['rag']
30
+ self.embedding_dim = rag_config.get('embedding_dim', 384)
31
+
32
+ # Lazy load SentenceTransformer for memory optimization
33
+ self.embedding_model = None
34
+ self.embedding_model_name = "all-MiniLM-L6-v2"
35
+
36
+ self.collection_name = rag_config.get('collection_name', 'mental_health_docs')
37
+ self.chunk_size = rag_config.get('chunk_size', 256)
38
+ self.chunk_overlap = rag_config.get('chunk_overlap', 32)
39
+ self.reranker_model = rag_config.get('reranker_model', 'cross-encoder/ms-marco-MiniLM-L-6-v2')
40
+ self.reranker_top_k = rag_config.get('reranker_top_k', 5)
41
+ self.max_context_length = rag_config.get('max_context_length', 2048)
42
+ self.include_sources = rag_config.get('include_sources', True)
43
+ self.use_local = rag_config.get('use_local', True)
44
+ self.url = rag_config.get('url', 'http://localhost:6333')
45
+ self.distance_metric = rag_config.get('distance_metric', 'Cosine') # Changed to 'Cosine'
46
+ self.min_retrieval_confidence = rag_config.get('min_retrieval_confidence', 0.85)
47
+
48
+ # Add missing attributes
49
+ self.processed_docs_dir = rag_config.get('processed_docs_dir', 'processed_docs')
50
+ self.knowledge_dir = rag_config.get('knowledge_dir', 'knowledge')
51
+ else:
52
+ # Default values if no YAML config
53
+ self.embedding_dim = 384
54
+ self.embedding_model = None
55
+ self.embedding_model_name = "all-MiniLM-L6-v2"
56
+ self.collection_name = 'mental_health_docs'
57
+ self.chunk_size = 256
58
+ self.chunk_overlap = 32
59
+ self.reranker_model = 'cross-encoder/ms-marco-MiniLM-L-6-v2'
60
+ self.reranker_top_k = 5
61
+ self.max_context_length = 2048
62
+ self.include_sources = True
63
+ self.use_local = True
64
+ self.url = 'http://localhost:6333'
65
+ self.distance_metric = 'Cosine' # Changed to 'Cosine'
66
+ self.min_retrieval_confidence = 0.85
67
+ self.processed_docs_dir = 'processed_docs'
68
+ self.knowledge_dir = 'knowledge'
69
+
70
+ self.context_limit = 4
71
+
72
+ def get_embedding_model(self):
73
+ """Lazy load the embedding model only when needed"""
74
+ if self.embedding_model is None:
75
+ try:
76
+ from sentence_transformers import SentenceTransformer
77
+ self.embedding_model = SentenceTransformer(self.embedding_model_name)
78
+ print(f"✅ Embedding model loaded: {self.embedding_model_name}")
79
+ except Exception as e:
80
+ print(f"⚠️ Failed to load embedding model: {e}")
81
+ return None
82
+ return self.embedding_model
83
+
84
+
85
+ @dataclass
86
+ class ConversationConfig:
87
+ """Configuration for Conversation agent"""
88
+ def __init__(self):
89
+ self.llm = ChatGoogleGenerativeAI(
90
+ model="gemini-1.5-flash",
91
+ temperature=0.7,
92
+ google_api_key=os.environ.get("GOOGLE_API_KEY")
93
+ )
94
+
95
+
96
+ @dataclass
97
+ class WebSearchConfig:
98
+ """Configuration for Web Search agent"""
99
+ def __init__(self):
100
+ self.context_limit = 4
101
+ self.llm = ChatGoogleGenerativeAI(
102
+ model="gemini-1.5-flash",
103
+ temperature=0.5,
104
+ google_api_key=os.environ.get("GOOGLE_API_KEY")
105
+ )
106
+ # Add Tavily API key configuration
107
+ self.tavily_api_key = os.environ.get("TAVILY_API_KEY", "tvly-your-api-key-here")
108
+
109
+
110
+ @dataclass
111
+ class AgentDecisionConfig:
112
+ """Configuration for Agent Decision system"""
113
+ def __init__(self):
114
+ self.llm = ChatGoogleGenerativeAI(
115
+ model="gemini-2.0-flash",
116
+ temperature=0,
117
+ google_api_key=os.environ.get("GOOGLE_API_KEY")
118
+ )
119
+
120
+
121
+ class Config:
122
+ """Main configuration class that loads from YAML files"""
123
+
124
+ def __init__(self):
125
+ # Set API keys
126
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyDzBTzKt211XwMurywdk5HFCnFeeFxcRJ0"
127
+ os.environ["TAVILY_API_KEY"] = "tvly-your-api-key-here" # You need to replace this
128
+
129
+ # Load YAML configurations
130
+ self.config_dict = self._load_yaml_configs()
131
+
132
+ # Initialize configurations
133
+ self.rag = RAGConfig(self.config_dict)
134
+ self.conversation = ConversationConfig()
135
+ self.web_search = WebSearchConfig()
136
+ self.agent_decision = AgentDecisionConfig()
137
+
138
+ # General settings
139
+ self.max_conversation_history = 20
140
+
141
+ def _load_yaml_configs(self) -> Dict[str, Any]:
142
+ """Load all YAML configuration files"""
143
+ config_dict = {}
144
+ config_dir = Path(__file__).parent
145
+
146
+ # Load each YAML file
147
+ yaml_files = ['agents.yaml', 'rag.yaml', 'tasks.yaml']
148
+ for yaml_file in yaml_files:
149
+ file_path = config_dir / yaml_file
150
+ if file_path.exists():
151
+ with open(file_path, 'r') as f:
152
+ data = yaml.safe_load(f)
153
+ if data:
154
+ config_dict.update(data)
155
+
156
+ return config_dict
157
+
158
+ def get_agent_config(self, agent_name: str) -> Dict[str, Any]:
159
+ """Get configuration for a specific agent"""
160
+ return self.config_dict.get(agent_name, {})
161
+
162
+ def get_task_config(self, task_name: str) -> Dict[str, Any]:
163
+ """Get configuration for a specific task"""
164
+ return self.config_dict.get(task_name, {})
config/rag.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rag:
2
+ collection_name: "mental_health_docs"
3
+ embedding_model: "all-MiniLM-L6-v2"
4
+ embedding_dim: 384
5
+ chunk_size: 256
6
+ chunk_overlap: 32
7
+ processed_docs_dir: "processed_docs"
8
+ knowledge_dir: "knowledge"
9
+
10
+ # Qdrant settings
11
+ use_local: true
12
+ url: "http://localhost:6333"
13
+ distance_metric: "Cosine"
14
+
15
+ # Optimized reranker settings for speed
16
+ reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2"
17
+ reranker_top_k: 3 # Reduced from 5 to 3 for faster processing
18
+
19
+ # Response generation
20
+ max_context_length: 1024 # Reduced from 2048 for faster processing
21
+ response_format_instructions: ""
22
+ include_sources: true
23
+ min_retrieval_confidence: 0.5 # Increased from 0.4 for better quality/speed trade-off
24
+ top_k: 3 # Reduced from 5 for faster retrieval
25
+ max_context_docs: 3 # Reduced for faster processing
26
+
config/tasks.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ emotion_detection_task:
2
+ description: >
3
+ Analyze the user's input: "{user_input}"
4
+ Identify their primary emotion (e.g., sadness, anxiety, neutral) and provide a short emotional summary.
5
+ expected_output: >
6
+ A short emotion label, e.g., "User feels anxious."
7
+ output_key: emotion_summary
8
+ agent: emotion_detector
9
+
10
+ safety_check_task:
11
+ description: >
12
+ Review the user's message and conversation context.
13
+ Determine whether it contains signs of emotional distress or crisis using your mental health knowledge base.
14
+ Respond in JSON format:
15
+ {
16
+ "distress": true/false,
17
+ "trigger": "quoted text from user input if applicable"
18
+ }
19
+ expected_output: >
20
+ Structured JSON with distress flag and trigger phrase if any.
21
+ output_key: safety_result
22
+ agent: safety_agent
23
+ context:
24
+ - emotion_detection_task
25
+
26
+ rag_retrieval_task:
27
+ description: >
28
+ Based on the user's emotional state, retrieve concise mental health tips from the corpus.
29
+ expected_output: >
30
+ 1–2 short tips, e.g., "Take a walk in nature. Try deep breathing exercises."
31
+ output_key: retrieved_tips
32
+ agent: rag_retriever
33
+ context:
34
+ - emotion_detection_task
35
+
36
+ rag_reading_task:
37
+ description: >
38
+ Summarize the tips retrieved in a helpful and brief tone.
39
+ expected_output: >
40
+ A 1-2 line practical summary of the tips.
41
+ output_key: summarized_tips
42
+ agent: rag_reader
43
+ context:
44
+ - emotion_detection_task
45
+ - rag_retrieval_task
46
+
47
+ suggestion_task:
48
+ description: >
49
+ Give 1-2 practical, empathetic suggestions based on emotional state and retrieved tips.
50
+ If appropriate, gently offer further help such as:
51
+ "Would you like me to walk you through a breathing exercise?" or
52
+ "Shall I guide you with some mindfulness steps?"
53
+ expected_output: >
54
+ 1-2 short actionable suggestions (1-2 lines), optionally followed by a gentle offer for guided support.
55
+ E.g., "Try a short breathing exercise. Take a break from screens. Would you like me to walk you through one?"
56
+ output_key: suggestions
57
+ agent: suggestion_agent
58
+ context:
59
+ - emotion_detection_task
60
+ - safety_check_task
61
+ - rag_retrieval_task
62
+ - rag_reading_task
63
+
64
+ orchestrate_task:
65
+ description: >
66
+ Respond to: "{user_input}" with an adaptive tone and length.
67
+ - For greetings or neutral input: be short and friendly.
68
+ - For moderate emotions (e.g., anxiety, stress): respond with 5-6 concise lines.
69
+ - For crisis-level input: give a longer, supportive message with clear help suggestions.
70
+ expected_output: >
71
+ A concise, human-like response adapted to the emotional tone and urgency.
72
+ output_key: final_response
73
+ agent: orchestrator
74
+ context:
75
+ - emotion_detection_task
76
+ - safety_check_task
77
+ - rag_retrieval_task
78
+ - rag_reading_task
79
+ - suggestion_task
crew_ai/README.md ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DrukCare
2
+ A skeleton for CrewAI agents for providing mental health assistance.
3
+ ## Tagline: Empowering Mental Well-being with Intelligent and Culturally Sensitive Support.
4
+
5
+ # 1. About
6
+ DrukCare AI is an intelligent chatbot application designed to provide empathetic and personalized mental health assistance, specifically tailored for the context of Bhutan. Leveraging the CrewAI framework, this system orchestrates a team of specialized AI agents to guide users through various stages of support, from crisis detection and profile collection to dynamic mental health assessments and personalized recommendations.
7
+
8
+ The project aims to offer accessible, initial mental health guidance, respecting user privacy and cultural nuances, while adhering to ethical guidelines.
9
+
10
+ # 2. Features
11
+
12
+ 1. Crisis Detection: Identifies suicidal ideation or mental health emergencies.
13
+ 2. Condition Classification: Detects depression, anxiety, substance use, or general mental health concerns.
14
+ 3. Interactive Assessments: Conducts PHQ-9, GAD-7, and DAST-10 questionnaires and included questionnaire for alcohol use and bipolar syndrome.
15
+ 4. Personalized Recommendations: Offers suggestions including helplines, therapy options, mindfullness, meditation buddha and meditation practices.
16
+ 5. RAG-Driven Retrieval: Retrieves relevant documents using a vector store (PostgreSQL pgvector) hosted on Supabase.
17
+ 6. User Profile Integration: Adapts recommendations using user history and preferences.
18
+
19
+ # 3. Workflow
20
+
21
+ The DrukCare AI operates as a sequential CrewAI process, ensuring a structured and coherent user interaction flow:
22
+
23
+ 1. User inputs a query.
24
+ 2. Crisis detection checks for emergencies.
25
+ 3. User profile is retrieved or mocked.
26
+ 4. Condition classifier suggests a condition and assessment.
27
+ 5. User confirms and takes the questionnaire.
28
+ 6. Score is calculated and interpreted.
29
+ 7. Final recommendation is generated (optionally enhanced via RAG).
30
+
31
+ ![Alt text]("data\flow_chart.png" "Flow diagram")
32
+
33
+ ## Crisis Detection:
34
+
35
+ Input: User's initial query.
36
+
37
+ Action: The Crisis Detection Specialist agent analyzes the input for emergency signs. Used the trained model from hugging face as a tool to detect whether the query is crisis or no-crisis.
38
+
39
+ Output: Crisis or No-crisis
40
+
41
+ ## User Profile Collection:
42
+
43
+ Input: User's query and status from Crisis Detection.
44
+
45
+ Action: Queries the user profile from PostGreSQL database.
46
+
47
+ Output: A structured user profile in json format.
48
+
49
+ ## Knowledge Retrieval & Query Refinement (RAG):
50
+
51
+ Input: User's initial query and collected user profile.
52
+
53
+ Action: The Knowledge Base Manager & Query Refiner agent interprets the user's intent, formulating specific keywords for the vector database. It then retrieves relevant mental health information and identifies a potential condition (e.g., 'depression', 'anxiety').
54
+
55
+ Output: Relevant mental health recommendations and the identified condition.
56
+
57
+ ## Conditional Assessment:
58
+
59
+ Input: Identified condition from mental condition classifier crew.
60
+
61
+ Action: The Mental Health Assessment Specialist agent determines if an assessment is relevant (e.g., PHQ-9 for depression, GAD-7 for anxiety). If relevant, it seeks explicit user consent. If consent is given, it administers the questionnaire step-by-step.
62
+
63
+ Output: Assessment status (completed, skipped, denied) and results (score, interpretation) if completed.
64
+
65
+ ## Personalized Recommendation:
66
+
67
+ Input: Original user query, collected user profile, RAG results, and assessment results.
68
+
69
+ Action: The Personalized Recommendation Engine synthesizes all gathered information to generate highly personalized, empathetic, and actionable mental health recommendations, culturally adapted for Bhutan.
70
+
71
+ Output: The final comprehensive recommendation to the user.
72
+
73
+ # 4. Architecture/Components
74
+ The application is built using the CrewAI framework, comprising Agents, Tasks, and Tools.
75
+
76
+ ## 4.1. Agents
77
+
78
+ 1. **CrisisDetectionAgent:** Detects urgent crisis signals in user queries.
79
+ 2. **Mental Condition Classifier Agent:** Identifies the likely mental condition.
80
+ 3. **Data Retriever Agent:** Fetches user demographic and mental health background.
81
+ 4. **RecommendationAgent:** Synthesizes all information and provides actionable recommendations.
82
+ 5. **RAG Agent:** Retrieves external documents to augment the response context (RAG).
83
+
84
+ ## 4.2. Tasks
85
+
86
+ 1. *Crisis detection task:* The crisis condition will be detected using the ‘crisis classifier tool’ . This task is performed by the Crisis Detection agent. Input will be the ‘user_query’.
87
+ 2. *Mental condition classifier task:* This will classify the mental health condition from the ‘user_query’ (say anxiety, depression, substance abuse etc) and recommend the questionnaire (for example, if the detected mental health condition is ‘anxiety’, ‘GAD-7’ questionnaire is recommended to the user). This is made optional for the user. This task is performed by Mental Condition Classifier Agent.
88
+ 3. *Data retriever task:* This will retrieve the user profile provided the ‘user_profile_id’ from the database. This task is performed by Data Retriever Agent. It has access to the tool to help accomplish the desired task.
89
+ 4. *Rag task:* This first retrieves the keywords from the user query, refines the user query if needed and fetches the documents from the vector DB using semantic search.
90
+ 5. *Recommendation task:* This is for providing a comprehensive recommendation based on the user profile, condition detected, assessment results and the retrieved documents from vector database. This also has the tool to fetch the helplines for handling crisis situations and also for severe mental health conditions. The agent performing this task is Recommendation Agent.
91
+
92
+ ## 4.3. Crews
93
+
94
+ 1. **Crisis_management_crew** (crisis detection agent): Runs crisis detection logic.
95
+ 2. **Mental_condition_crew** (mental condition classifier agent): Classifies user's mental health condition.
96
+ 3. **Data_retrieval_crew** (data retriever agent): Retrieves and formats user profile data.
97
+ 4. **Recommendation_crew** (rag agent + recommendation agent): Generates final recommendations using inputs from other agents and RAG.
98
+
99
+
100
+ ## 4.4 Tools
101
+
102
+ 1. *Bhutanese Helplines:* Provides a predefined list of mental health helplines relevant to Bhutan.
103
+
104
+ 2. *Vector Database Operations:* Retrieval from vector database for mental health recommendations. Uses 'sentence-transformers/ all-MiniLM-L6-v2' model for embedding queries.
105
+
106
+ 3. *Data Retrieval:* To fetch the user profiles from PostGreSQL database. Now it contains some dummy user profiles. The features include: age, gender, city_region, profession, marital status, previous mental diagnosis, ethnicity.
107
+
108
+ 4. *Crisis Classifier:* Uses 'sentinet/suicidality' model to detect the crisis condition. Note: lmsdmn/crisis-detection-model model has been finetuned on the specific dataset. If this model is needed to be used, we need to define a new tool for it.
109
+
110
+ # 5. Usage
111
+
112
+ 1. Create a python virtual environment.
113
+
114
+ 2. Install the dependencies from requirements.txt by running the below command in your terminal:
115
+
116
+ pip install -r requirements.txt
117
+
118
+ 3. Then, run the below command in your terminal:
119
+
120
+ python chatbot.py
121
+
122
+ The console output will show the detailed steps of how agents interact, tools are used, and the final recommendations are generated for each simulated user input.
123
+
124
+ ## 5.1. LLM API Key Setup
125
+ Crucially, DrukCare AI relies on a Language Model (LLM) to function.
126
+
127
+ You need to set up your LLM provider's API key. For example, if you are using OpenAI:
128
+
129
+ 1. Obtain an API key from your chosen LLM provider (e.g., OpenAI API Keys).
130
+
131
+ 2. Set it as an environment variable:
132
+
133
+ export OPENAI_API_KEY="YOUR_API_KEY_HERE" # On macOS/Linux
134
+ Or for Windows (in Command Prompt):
135
+ set OPENAI_API_KEY="YOUR_API_KEY_HERE"
136
+ In PowerShell:
137
+ $env:OPENAI_API_KEY="YOUR_API_KEY_HERE"
138
+
139
+ Alternatively, you can hardcode it in your script (for local testing, not recommended for production):
140
+
141
+ os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY_HERE"
142
+
143
+ 3. Make sure your selected LLM matches the model_name you are using.
144
+
145
+ ## 5.2 Setting up connection to Supabase
146
+
147
+ Setup the SUPABASE_DB_URI in your environment in .env file. SUPABASE_DB_URI can be requested from Alaa or Adelia.
148
+
149
+ ## 5.3 Langsmith setup for Tracing and Monitoring
150
+
151
+ Prerequisites:
152
+
153
+ 1. LangSmith Account and Project: Ensure you have a LangSmith account and a project where your LLM runs are being traced. Setting LANGSMITH_TRACING=true and LANGSMITH_PROJECT environment variables usually handles this.
154
+ 2. API Key: Have your LANGSMITH_API_KEY ready.
155
+ 3. LLM Provider API Key: Since your evaluator will be an LLM, you'll need the API key for the LLM provider you plan to use for the evaluation (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY). You'll need to add these as "Secrets" in LangSmith (Settings -> Secrets -> Add Secret).
156
+
157
+
158
+ # Disclaimer
159
+
160
+ This DrukCare AI chatbot is designed for informational and initial supportive purposes only. It is not a substitute for professional medical advice, diagnosis, or treatment. Always seek the advice of a qualified mental health professional for any questions you may have regarding a medical condition. If you are in a crisis situation, please contact the provided helplines immediately.
161
+
162
+ # License
163
+
164
+ Will be updated later.
crew_ai/__init__.py ADDED
File without changes
crew_ai/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (195 Bytes). View file
 
crew_ai/__pycache__/chatbot.cpython-311.pyc ADDED
Binary file (18.8 kB). View file
 
crew_ai/__pycache__/config.cpython-311.pyc ADDED
Binary file (1.87 kB). View file
 
crew_ai/__pycache__/data_retriever_util.cpython-311.pyc ADDED
Binary file (5.44 kB). View file