diff --git a/.gitattributes b/.gitattributes index 996e64d6554f1c98c6669ef2a764135c102555f0..c82561ab508b0b824922eaf4110db9fe0d0c9c17 100644 --- a/.gitattributes +++ b/.gitattributes @@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text static/treeman.png filter=lfs diff=lfs merge=lfs -text +knowledge/deathless.pdf filter=lfs diff=lfs merge=lfs -text +knowledge/know-see.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/agents/__init__.py b/agents/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/agents/__pycache__/__init__.cpython-311.pyc b/agents/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b61cdf170b80d7b2bc39d9df3f709f87764d631 Binary files /dev/null and b/agents/__pycache__/__init__.cpython-311.pyc differ diff --git a/agents/__pycache__/__init__.cpython-312.pyc b/agents/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d16e58b095b9bef7e6853a69cc78c2bbb5c59d07 Binary files /dev/null and b/agents/__pycache__/__init__.cpython-312.pyc differ diff --git a/agents/__pycache__/agent_decision.cpython-311.pyc b/agents/__pycache__/agent_decision.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79486981ff77b15564cd7227e1127bc1b1f3a52b Binary files /dev/null and b/agents/__pycache__/agent_decision.cpython-311.pyc differ diff --git a/agents/__pycache__/shared_rag.cpython-311.pyc b/agents/__pycache__/shared_rag.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7918e89bd37f1be925e2b36b46bbf65595b36509 Binary files /dev/null and b/agents/__pycache__/shared_rag.cpython-311.pyc differ diff --git a/agents/__pycache__/shared_rag.cpython-312.pyc b/agents/__pycache__/shared_rag.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..66c9817c4bec899294515b9970439d2c8c913ee6 Binary files /dev/null and b/agents/__pycache__/shared_rag.cpython-312.pyc differ diff --git a/agents/agent_decision.py b/agents/agent_decision.py new file mode 100644 index 0000000000000000000000000000000000000000..013b0bd4f8229afb8843829114a92b839835bc37 --- /dev/null +++ b/agents/agent_decision.py @@ -0,0 +1,336 @@ +""" +Agent Decision System for Multi-Agent Mental Health Chatbot +Orchestrates RAG, CrewAI, and Web Search agents using LangGraph. +""" +from dotenv import load_dotenv +import json +from typing import TypedDict, List, Any, Optional, Union, Dict +from langchain_core.messages import HumanMessage, AIMessage +from langgraph.graph import StateGraph, END +from langgraph.checkpoint.memory import MemorySaver +from difflib import SequenceMatcher + +from agents.web_search_processor_agent import WebSearchProcessorAgent +from config.config import Config +from .shared_rag import shared_rag_instance + +# Import CrewAI components +try: + from crew_ai.chatbot import ( + run_crisis_check, + run_condition_classification, + run_user_profile_retrieval, + run_recommendations + ) + CREWAI_AVAILABLE = True + print("✅ CrewAI components imported successfully") +except ImportError as e: + print(f"⚠️ CrewAI components not available: {e}") + CREWAI_AVAILABLE = False + +load_dotenv() + +# Configuration +config = Config() +memory = MemorySaver() + +# Simple query cache +QUERY_CACHE = {} +CACHE_SIMILARITY_THRESHOLD = 0.85 + +class AgentState(TypedDict): + """State maintained across the workflow.""" + messages: List[Any] + agent_name: Optional[str] + current_input: Optional[Union[str, Dict]] + output: Optional[str] + retrieval_confidence: float + user_context: Optional[Dict] + # CrewAI specific state + crisis_detected: Optional[bool] + condition_classified: Optional[str] + user_profile: Optional[Dict] + crewai_used: Optional[bool] + +def get_cached_response(query: str, user_context: dict) -> Optional[dict]: + """Check for cached similar response.""" + query_lower = query.lower() + + for cached_query, cached_data in QUERY_CACHE.items(): + similarity = SequenceMatcher(None, query_lower, cached_query.lower()).ratio() + if similarity >= CACHE_SIMILARITY_THRESHOLD: + if (cached_data['emotion'] == user_context.get('emotion') and + cached_data['status'] == user_context.get('mental_health_status')): + print(f"[CACHE] Found similar response (similarity: {similarity:.2f})") + return cached_data['response'] + return None + +def cache_response(query: str, user_context: dict, response: dict): + """Cache response for future use.""" + if len(QUERY_CACHE) > 100: + oldest_key = next(iter(QUERY_CACHE)) + del QUERY_CACHE[oldest_key] + + QUERY_CACHE[query] = { + 'emotion': user_context.get('emotion'), + 'status': user_context.get('mental_health_status'), + 'response': response + } + +def create_agent_graph(): + """Create and configure the LangGraph for agent orchestration.""" + + def rag_agent_processor(state): + """Process query using RAG pipeline""" + try: + user_query = state["current_input"] + user_context = state.get("user_context", {}) + + print(f"[RAG_AGENT] Processing: {user_query[:50]}...") + print(f"[RAG_AGENT] Context - Emotion: {user_context.get('emotion', 'neutral')}, Status: {user_context.get('mental_health_status', 'Unknown')}") + + # Check cache first + cached_response = get_cached_response(user_query, user_context) + if cached_response: + print("[RAG_AGENT] Using cached response") + return { + **state, + "output": cached_response.get('response', ''), + "agent_name": "RAG_AGENT_CACHED", + "retrieval_confidence": cached_response.get('confidence', 0.8), + "crewai_used": False + } + + # Get RAG instance and process query + rag = shared_rag_instance.get_rag() + result = rag.process_query( + query=user_query, + user_emotion=user_context.get('emotion', 'neutral'), + mental_health_status=user_context.get('mental_health_status', 'Unknown'), + user_context=user_context + ) + + confidence = result.get("confidence", 0.0) + response_text = result.get("response", "I'm here to help you with your mental health concerns.") + + print(f"[RAG_AGENT] Confidence: {confidence}") + print(f"[RAG_AGENT] Response: {response_text[:100]}...") + + # Cache the response + cache_response(user_query, user_context, result) + + return { + **state, + "output": response_text, + "agent_name": "RAG_AGENT", + "retrieval_confidence": confidence, + "crewai_used": False + } + + except Exception as e: + print(f"[RAG_AGENT] Error: {e}") + return { + **state, + "output": "I understand you're reaching out for support. While I'm having some technical difficulties, I want you to know that your feelings are valid and there are people who can help.", + "agent_name": "RAG_AGENT_FALLBACK", + "retrieval_confidence": 0.0, + "crewai_used": False + } + + def crewai_pipeline_processor(state: AgentState) -> AgentState: + """Process query using CrewAI pipeline when RAG confidence is low""" + print("[CREWAI_PIPELINE] Processing with CrewAI agents...") + + try: + if not CREWAI_AVAILABLE: + print("[CREWAI_PIPELINE] Not available, falling back to web search") + return fallback_to_web_search(state) + + user_query = str(state["current_input"]) + user_context = state.get("user_context", {}) + + print(f"[CREWAI_PIPELINE] Query: {user_query[:50]}...") + + # Use RAG's CrewAI integration if available + rag = shared_rag_instance.get_rag() + if hasattr(rag, 'process_query_with_crewai') and rag.crewai_enabled: + print("[CREWAI_PIPELINE] Using RAG's CrewAI integration") + + result = rag.process_query_with_crewai(user_query, user_context) + + return { + **state, + "output": result.get("response", "I'm here to support you."), + "agent_name": result.get("agent", "CREWAI_ENHANCED_SYSTEM"), + "crisis_detected": result.get("is_crisis", False), + "condition_classified": result.get("condition", "general"), + "crewai_used": True, + "retrieval_confidence": result.get("confidence", 0.85) + } + else: + print("[CREWAI_PIPELINE] Using direct CrewAI functions") + + # Direct CrewAI processing + user_id = user_context.get('user_id', 'anon_user') + + # Crisis check + crisis_result = run_crisis_check(user_query) + is_crisis = crisis_result.get("is_crisis", False) + + if is_crisis: + crisis_rec = run_recommendations( + user_query, + user_profile=json.dumps(user_context), + condition="Crisis", + answers="{}", + interpretation="N/A", + is_crisis="true" + ) + + return { + **state, + "output": crisis_rec.get("recommendation", + "🆘 Please reach out for immediate help. In Bhutan: Emergency Services (112), National Mental Health Program (1717)"), + "agent_name": "CREWAI_CRISIS_AGENT", + "crisis_detected": True, + "crewai_used": True, + "retrieval_confidence": 0.95 + } + + # Normal processing + try: + user_profile = run_user_profile_retrieval(user_query, user_id) + except: + user_profile = {"id": user_id, "name": user_context.get('name', 'User')} + + try: + condition_result = run_condition_classification(user_query, json.dumps(user_profile)) + condition = condition_result.get("condition", "general").lower() + except: + condition = "general" + + final_rec = run_recommendations( + user_query, + json.dumps(user_profile), + condition, + json.dumps(user_context.get('assessment_answers', {})), + user_context.get('mental_health_status', 'Unknown'), + is_crisis="false" + ) + + return { + **state, + "output": final_rec.get("recommendation", + f"Thank you for sharing your concerns. I'm here to support you with {condition} related issues."), + "agent_name": "CREWAI_ENHANCED_SYSTEM", + "condition_classified": condition, + "user_profile": user_profile, + "crewai_used": True, + "retrieval_confidence": 0.85 + } + + except Exception as e: + print(f"[CREWAI_PIPELINE] Error: {e}") + return fallback_to_web_search(state) + + def fallback_to_web_search(state: AgentState) -> AgentState: + """Fallback to web search processor""" + print("[WEB_SEARCH] Processing with web search agent...") + + try: + query = str(state["current_input"]) + user_context = state.get("user_context", {}) + + # Use WebSearchProcessorAgent + web_agent = WebSearchProcessorAgent() + response = web_agent.process_web_search_results( + query=query, + user_context=user_context + ) + + return { + **state, + "output": response, + "agent_name": "WEB_SEARCH_PROCESSOR_AGENT", + "crewai_used": False + } + + except Exception as e: + print(f"[WEB_SEARCH] Error: {e}") + return { + **state, + "output": "I'm here to support you, though I'm having some technical difficulties. Please know that help is available. For immediate support in Bhutan, contact the National Mental Health Program at 1717.", + "agent_name": "WEB_SEARCH_FALLBACK", + "crewai_used": False + } + + def confidence_based_routing(state: AgentState) -> str: + """Route based on RAG confidence score.""" + min_confidence = getattr(config.rag, 'min_retrieval_confidence', 0.7) + confidence = state.get("retrieval_confidence", 0.0) + + print(f"[ROUTING] Confidence: {confidence:.2f}, Threshold: {min_confidence}") + + if confidence < min_confidence: + if CREWAI_AVAILABLE: + print(f"[ROUTING] Low confidence, routing to CrewAI...") + return "CREWAI_PIPELINE" + else: + print(f"[ROUTING] Low confidence, routing to Web Search...") + return "WEB_SEARCH_PROCESSOR_AGENT" + + print(f"[ROUTING] High confidence, finalizing...") + return "finalize_response" + + def finalize_response(state: AgentState) -> AgentState: + """Finalize the response.""" + output = state.get("output", "") + + if output: + messages = state.get("messages", []) + messages.append(AIMessage(content=str(output))) + + agent_name = state.get("agent_name", "Unknown") + crewai_used = state.get("crewai_used", False) + + print(f"[FINALIZE] Response from {agent_name}, CrewAI: {crewai_used}") + + return { + **state, + "messages": messages + } + + return state + + # Create workflow graph + workflow = StateGraph(AgentState) + + # Add nodes + workflow.add_node("RAG_AGENT", rag_agent_processor) + workflow.add_node("CREWAI_PIPELINE", crewai_pipeline_processor) + workflow.add_node("WEB_SEARCH_PROCESSOR_AGENT", fallback_to_web_search) + workflow.add_node("finalize_response", finalize_response) + + # Define edges + workflow.set_entry_point("RAG_AGENT") + workflow.add_conditional_edges("RAG_AGENT", confidence_based_routing) + workflow.add_edge("CREWAI_PIPELINE", "finalize_response") + workflow.add_edge("WEB_SEARCH_PROCESSOR_AGENT", "finalize_response") + workflow.add_edge("finalize_response", END) + + return workflow.compile(checkpointer=memory) + +def init_agent_state() -> AgentState: + """Initialize agent state with default values.""" + return { + "messages": [], + "agent_name": None, + "current_input": None, + "output": None, + "retrieval_confidence": 0.0, + "user_context": None, + "crisis_detected": None, + "condition_classified": None, + "user_profile": None, + "crewai_used": None + } \ No newline at end of file diff --git a/agents/rag_agent/UNUSED_evaluation.py b/agents/rag_agent/UNUSED_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..025299e553025b35a45d4755a5ff518795ee0263 --- /dev/null +++ b/agents/rag_agent/UNUSED_evaluation.py @@ -0,0 +1,177 @@ +import logging +from typing import List, Dict, Any, Optional +import re +import json +from collections import Counter + +class RAGEvaluator: + """ + Evaluates the performance of the RAG system and tracks metrics. + """ + def __init__(self, config): + """ + Initialize the evaluator. + Args: + config: Configuration object + """ + self.logger = logging.getLogger(__name__) + self.metrics = { + "queries_processed": 0, + "successful_retrievals": 0, + "failed_retrievals": 0, + "avg_retrieval_time": 0, + "avg_response_time": 0, + "avg_confidence_score": 0, + "feedback_scores": [] + } + self.save_path = getattr(config.rag, "metrics_save_path", "rag_metrics.json") + + def log_retrieval(self, query: str, retrieved_docs: List[Dict[str, Any]], + retrieval_time: float, success: bool = True): + """ + Log metrics for a retrieval operation. + """ + self.metrics["queries_processed"] += 1 + + if success and retrieved_docs: + self.metrics["successful_retrievals"] += 1 + prev_avg = self.metrics["avg_retrieval_time"] + prev_count = self.metrics["queries_processed"] - 1 + self.metrics["avg_retrieval_time"] = (prev_avg * prev_count + retrieval_time) / self.metrics["queries_processed"] + scores = [doc.get("score", 0) for doc in retrieved_docs] + avg_score = sum(scores) / len(scores) if scores else 0 + self.logger.info(f"Query: '{query}' | Docs: {len(retrieved_docs)} | Avg Score: {avg_score:.4f}") + else: + self.metrics["failed_retrievals"] += 1 + self.logger.warning(f"Failed retrieval for query: '{query}'") + + def log_response(self, query: str, response: Dict[str, Any], response_time: float): + """ + Log metrics for a response generation operation. + """ + prev_avg = self.metrics["avg_response_time"] + prev_count = self.metrics["queries_processed"] - 1 + self.metrics["avg_response_time"] = (prev_avg * prev_count + response_time) / self.metrics["queries_processed"] + + confidence = response.get("confidence", 0) + prev_avg = self.metrics["avg_confidence_score"] + self.metrics["avg_confidence_score"] = (prev_avg * prev_count + confidence) / self.metrics["queries_processed"] + + self.logger.info(f"Generated response for query: '{query}' | Confidence: {confidence:.4f}") + + def log_user_feedback(self, query: str, response: Dict[str, Any], feedback_score: int): + """ + Log user feedback on responses. + """ + self.metrics["feedback_scores"].append({ + "query": query, + "response": response.get("response", ""), + "score": feedback_score + }) + self.logger.info(f"Received feedback for query: '{query}' | Score: {feedback_score}") + + def evaluate_response_quality(self, query: str, retrieved_docs: List[Dict[str, Any]], + response: Dict[str, Any]) -> Dict[str, Any]: + """ + Evaluate the quality of the response based on retrieved documents. + """ + retrieval_precision = self._calculate_precision(query, retrieved_docs) + answer_relevance = self._calculate_relevance(query, response, retrieved_docs) + metrics = { + "retrieval_precision": retrieval_precision, + "answer_relevance": answer_relevance, + "hallucination_risk": self._estimate_hallucination_risk(response, retrieved_docs), + "answer_completeness": self._calculate_completeness(response, retrieved_docs) + } + return metrics + + def _calculate_precision(self, query: str, docs: List[Dict[str, Any]]) -> float: + """ + Calculate precision of retrieved documents (simplified). + """ + if not docs: + return 0.0 + scores = [doc.get("score", 0) for doc in docs] + return sum(scores) / len(scores) if scores else 0.0 + + def _calculate_relevance(self, query: str, response: Dict[str, Any], docs: List[Dict[str, Any]]) -> float: + """ + Calculate relevance of the response to the query. + """ + if not docs or not response: + return 0.0 + response_text = response.get("response", "").lower() + query_words = set(query.lower().split()) + stopwords = {"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "with", "by"} + query_words = query_words - stopwords + word_count = sum(1 for word in query_words if word in response_text) + return word_count / len(query_words) if query_words else 0.0 + + def _estimate_hallucination_risk(self, response: Dict[str, Any], docs: List[Dict[str, Any]]) -> float: + """ + Estimate risk of hallucination in the response. + """ + if not docs or not response: + return 1.0 + all_doc_content = " ".join([doc["content"].lower() for doc in docs]) + response_text = response.get("response", "").lower() + factual_statements = re.findall(r'[^.!?]*[.!?]', response_text) + unsupported = 0 + total = len(factual_statements) + for statement in factual_statements: + has_number = bool(re.search(r'\d+', statement)) + has_medical_term = bool(re.search(r'(?i)(disease|syndrome|treatment|medication|therapy|drug|dosage|diagnosis)', statement)) + if (has_number or has_medical_term) and not self._is_supported(statement, all_doc_content): + unsupported += 1 + return unsupported / total if total > 0 else 0.5 + + def _is_supported(self, statement: str, doc_content: str) -> bool: + """ + Check if a statement is supported by document content. + """ + keywords = statement.lower().split() + keywords = [w for w in keywords if len(w) > 4] + if not keywords: + return True + found = sum(1 for word in keywords if word in doc_content) + return (found / len(keywords)) >= 0.6 + + def _calculate_completeness(self, response: Dict[str, Any], docs: List[Dict[str, Any]]) -> float: + """ + Calculate completeness of the response. + """ + response_text = response.get("response", "") + word_count = len(response_text.split()) + length_score = min(word_count / 150, 1.0) + has_introduction = bool(re.search(r'^[A-Z][^.!?]{10,}[.!?]', response_text)) + has_conclusion = bool(re.search(r'(?i)(in conclusion|to summarize|overall|in summary)', response_text)) + structure_score = (has_introduction + has_conclusion) / 2 + return (length_score * 0.7) + (structure_score * 0.3) + + def save_metrics(self): + """Save current metrics to disk.""" + try: + with open(self.save_path, 'w') as f: + json.dump(self.metrics, f, indent=2) + self.logger.info(f"Metrics saved to {self.save_path}") + except Exception as e: + self.logger.error(f"Failed to save metrics: {e}") + + def get_metrics_summary(self) -> Dict[str, Any]: + """ + Get a summary of current metrics. + """ + retrieval_success_rate = 0 + if self.metrics["queries_processed"] > 0: + retrieval_success_rate = self.metrics["successful_retrievals"] / self.metrics["queries_processed"] + feedback_distribution = Counter(item["score"] for item in self.metrics["feedback_scores"]) + avg_feedback = sum(item["score"] for item in self.metrics["feedback_scores"]) / len(self.metrics["feedback_scores"]) if self.metrics["feedback_scores"] else 0 + return { + "queries_processed": self.metrics["queries_processed"], + "retrieval_success_rate": retrieval_success_rate, + "avg_retrieval_time": self.metrics["avg_retrieval_time"], + "avg_response_time": self.metrics["avg_response_time"], + "avg_confidence_score": self.metrics["avg_confidence_score"], + "feedback_distribution": feedback_distribution, + "avg_feedback_score": avg_feedback + } \ No newline at end of file diff --git a/agents/rag_agent/__init__.py b/agents/rag_agent/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..10f9caa95409d682345aec6ef7b195b4c760516e --- /dev/null +++ b/agents/rag_agent/__init__.py @@ -0,0 +1,340 @@ +from pathlib import Path +from typing import Dict, Any, List, Optional +from sentence_transformers import SentenceTransformer +import logging +from config.config import Config + +from .vector_store import QdrantRetriever +from .document_processor import MedicalDocumentProcessor +from .query_processor import QueryProcessor +from .reranker import Reranker +from .response_generator import ResponseGenerator +from .data_ingestion import MedicalDataIngestion +import json + +from dotenv import load_dotenv +load_dotenv() + + +class MedicalRAG: + """ + Medical Retrieval-Augmented Generation system that integrates all components. + """ + def __init__(self, config: Config, llm, embedding_model): + self.logger = logging.getLogger(__name__) + self.logger.setLevel(logging.INFO) + + self.config = config + self.llm = llm + self.embedding_model = embedding_model or SentenceTransformer("all-MiniLM-L6-v2") + + if not self.embedding_model: + raise ValueError("Embedding model is required for document processing") + + # Get chunking parameters from config + self.chunk_size = getattr(config.rag, 'chunk_size', 256) + self.chunk_overlap = getattr(config.rag, 'chunk_overlap', 50) + self.chunking_strategy = getattr(config.rag, 'chunking_strategy', 'hybrid') + + # Ensure processed docs directory exists + self.processed_docs_dir = Path(getattr(config.rag, 'processed_docs_dir', 'processed_docs')) + self.processed_docs_dir.mkdir(exist_ok=True) + + # Initialize CrewAI integration + self.crewai_enabled = True + try: + self._initialize_crewai_agents() + self.logger.info("✅ CrewAI agents initialized successfully") + except Exception as e: + self.logger.warning(f"⚠️ CrewAI agents not available: {e}") + self.crewai_enabled = False + + # Initialize core components + try: + self.retriever = QdrantRetriever(config) + self.document_processor = MedicalDocumentProcessor(config, self.embedding_model) + self.query_processor = QueryProcessor(config, self.embedding_model) + self.reranker = Reranker(config) + self.response_generator = ResponseGenerator(config, llm) + self.data_ingestion = MedicalDataIngestion() + + self.logger.info(f"✅ MedicalRAG initialized - Embedding dim: {getattr(config.rag, 'embedding_dim', 'unknown')}") + + except Exception as e: + self.logger.error(f"❌ Error initializing MedicalRAG components: {e}") + raise + + def _initialize_crewai_agents(self): + """Initialize CrewAI agents from crew_ai module""" + from crew_ai.chatbot import ( + run_crisis_check, + run_condition_classification, + run_user_profile_retrieval, + run_recommendations + ) + + # Store CrewAI functions + self.run_crisis_check = run_crisis_check + self.run_condition_classification = run_condition_classification + self.run_user_profile_retrieval = run_user_profile_retrieval + self.run_recommendations = run_recommendations + + def ingest_knowledge_folder(self, folder_path: str) -> Dict[str, Any]: + """ + Ingest all documents from a knowledge folder. + """ + folder = Path(folder_path) + if not folder.exists(): + self.logger.error(f"Knowledge folder not found: {folder_path}") + return {"error": f"Folder not found: {folder_path}"} + + ingestion_results = { + "total_files": 0, + "successful": 0, + "failed": 0, + "files": [], + "total_chunks": 0 + } + + # Get all supported files + supported_extensions = ['.txt', '.pdf', '.md', '.json', '.csv'] + files = [] + for ext in supported_extensions: + files.extend(folder.glob(f'*{ext}')) + + ingestion_results["total_files"] = len(files) + + if not files: + self.logger.warning(f"No supported files found in {folder_path}") + return ingestion_results + + self.logger.info(f"Found {len(files)} files to ingest") + + for file_path in files: + try: + self.logger.info(f"Ingesting {file_path.name}...") + + # Load and process document + documents = self.data_ingestion.load_document(str(file_path)) + if not documents: + self.logger.warning(f"No content extracted from {file_path.name}") + ingestion_results["failed"] += 1 + continue + + # Create chunks + chunks = self.document_processor.process_documents(documents) + self.logger.info(f"Created {len(chunks)} chunks from {file_path.name}") + + # Store in vector database + stored_count = self.retriever.add_documents(chunks) + + ingestion_results["successful"] += 1 + ingestion_results["total_chunks"] += len(chunks) + ingestion_results["files"].append({ + "name": file_path.name, + "status": "success", + "chunks": len(chunks), + "stored": stored_count + }) + + except Exception as e: + self.logger.error(f"Error ingesting {file_path.name}: {str(e)}") + ingestion_results["failed"] += 1 + ingestion_results["files"].append({ + "name": file_path.name, + "status": "error", + "error": str(e) + }) + + self.logger.info(f"Ingestion complete: {ingestion_results['successful']}/{ingestion_results['total_files']} files processed") + return ingestion_results + + def process_query(self, query: str, user_emotion: Optional[str] = None, + mental_health_status: Optional[str] = None, + user_context: Optional[Dict] = None, **kwargs) -> Dict[str, Any]: + """Process a query using the RAG pipeline with emotion and health status awareness.""" + try: + # Extract and validate parameters + user_emotion = user_emotion or 'neutral' + mental_health_status = mental_health_status or 'Unknown' + user_context = user_context or {} + + self.logger.info(f"[RAG] Processing query: {query[:50]}...") + self.logger.info(f"[RAG] User emotion: {user_emotion}, Status: {mental_health_status}") + + # Process query metadata (NOT recursive call) + query_metadata = self.query_processor.process_query(query) + enhanced_query = query_metadata.get('expanded_query', query) + + print(f"[RAG] Enhanced query: {enhanced_query}") + + # Generate embedding and retrieve documents + query_embedding = self.embedding_model.encode(enhanced_query) + retrieved_docs = self.retriever.retrieve( + query_embedding, + top_k=getattr(self.config.rag, 'top_k', 5), + metadata_filter=query_metadata.get('filters', {}) + ) + + print(f"[RAG] Retrieved {len(retrieved_docs)} documents") + + # Debug first document + if retrieved_docs: + doc = retrieved_docs[0] + if hasattr(doc, 'page_content'): + print(f"[RAG] Top doc: {doc.page_content[:100]}...") + else: + print(f"[RAG] Top doc: {str(doc)[:100]}...") + else: + print("[RAG] ⚠️ No documents retrieved!") + + # Rerank if enabled + if self.reranker and retrieved_docs: + print("[RAG] Reranking documents...") + reranked_docs = self.reranker.rerank(query, retrieved_docs) + else: + reranked_docs = retrieved_docs + + # Generate response + response_data = self.response_generator.generate_response( + query=query, + retrieved_docs=reranked_docs, + user_emotion=user_emotion, + mental_health_status=mental_health_status, + user_context=user_context + ) + + # Calculate final confidence + confidence = response_data.get("confidence", 0.5) + + # Boost confidence for personal emotional queries + if user_emotion not in ['neutral', 'neutral/unsure'] and any( + phrase in query.lower() for phrase in ["i am", "i feel", "i'm", "my", "me"] + ): + confidence_boost = 0.1 + confidence = min(confidence + confidence_boost, 1.0) + self.logger.info(f"[RAG] Boosted confidence by {confidence_boost} for personal query") + + response_data["confidence"] = confidence + + print(f"[RAG] Final confidence: {confidence}") + print(f"[RAG] Response: {response_data.get('response', '')[:100]}...") + + return response_data + + except Exception as e: + self.logger.error(f"[RAG] Error processing query: {e}") + import traceback + traceback.print_exc() + return self._generate_error_response(str(e)) + + def process_query_with_crewai(self, query: str, user_context: dict = None) -> Dict[str, Any]: + """Enhanced query processing using CrewAI agents""" + try: + if not self.crewai_enabled: + self.logger.info("[CrewAI] Not enabled, falling back to regular RAG") + return self.process_query( + query, + user_emotion=user_context.get('emotion', 'neutral'), + mental_health_status=user_context.get('mental_health_status', 'Unknown'), + user_context=user_context + ) + + self.logger.info(f"[CrewAI] Processing with agents: {query[:50]}...") + + # Step 1: Crisis Detection + crisis_result = self.run_crisis_check(query) + is_crisis = crisis_result.get("is_crisis", False) + + if is_crisis: + self.logger.warning("[CrewAI] 🚨 Crisis detected") + crisis_rec = self.run_recommendations( + query, + user_profile=json.dumps(user_context or {}), + condition="Crisis", + answers="{}", + interpretation="N/A", + is_crisis="true" + ) + return { + "response": crisis_rec.get("recommendation", + "🆘 Please contact emergency services immediately: 112 or National Mental Health Program: 1717"), + "confidence": 0.95, + "method": "crewai_crisis", + "agent": "Crisis Detection Agent", + "is_crisis": True, + "condition": "crisis", + "sources": [] + } + + # Step 2: Get user profile + user_id = user_context.get('user_id', 'anon_user') + try: + user_profile = self.run_user_profile_retrieval(query, user_id) + except: + user_profile = { + "id": user_id, + "name": user_context.get('name', 'User'), + "preferences": "General mental health support" + } + + # Step 3: Classify condition + try: + condition_result = self.run_condition_classification(query, json.dumps(user_profile)) + condition = condition_result.get("condition", "general").lower() + except: + condition = "general" + + self.logger.info(f"[CrewAI] Classified condition: {condition}") + + # Step 4: Get RAG results for context + rag_result = self.process_query( + query, + user_emotion=user_context.get('emotion', 'neutral'), + mental_health_status=user_context.get('mental_health_status', 'Unknown'), + user_context=user_context + ) + + # Step 5: Generate final recommendation + final_rec = self.run_recommendations( + query, + json.dumps(user_profile), + condition, + json.dumps(user_context.get('assessment_answers', {})), + user_context.get('mental_health_status', 'Unknown'), + is_crisis="false" + ) + + # Combine RAG and CrewAI insights + combined_response = final_rec.get("recommendation", rag_result.get("response", "")) + + return { + "response": combined_response, + "confidence": max(rag_result.get("confidence", 0.5), 0.85), + "method": "crewai_enhanced", + "agent": "CrewAI Enhanced System", + "sources": rag_result.get("sources", []), + "condition": condition, + "is_crisis": False, + "rag_confidence": rag_result.get("confidence", 0.5) + } + + except Exception as e: + self.logger.error(f"[CrewAI] Processing failed: {e}") + # Fallback to regular RAG + return self.process_query( + query, + user_emotion=user_context.get('emotion', 'neutral'), + mental_health_status=user_context.get('mental_health_status', 'Unknown'), + user_context=user_context + ) + + def _generate_error_response(self, error_message: str) -> Dict[str, Any]: + """Generate a fallback error response when RAG processing fails.""" + return { + "response": "I apologize, but I'm experiencing some technical difficulties. For immediate mental health support in Bhutan, please contact:\n\n• National Mental Health Program: 1717 (24/7)\n• Emergency Services: 112\n\nYour mental health matters, and help is available.", + "confidence": 0.0, + "sources": [], + "method": "error_fallback", + "error": error_message + } \ No newline at end of file diff --git a/agents/rag_agent/__pycache__/__init__.cpython-311.pyc b/agents/rag_agent/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..423b6fe9848522e739eb6b14bce81251e644ddf6 Binary files /dev/null and b/agents/rag_agent/__pycache__/__init__.cpython-311.pyc differ diff --git a/agents/rag_agent/__pycache__/data_ingestion.cpython-311.pyc b/agents/rag_agent/__pycache__/data_ingestion.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1cb6cecd19a25b6eb8fc4f41b3cdb399a8a96ee3 Binary files /dev/null and b/agents/rag_agent/__pycache__/data_ingestion.cpython-311.pyc differ diff --git a/agents/rag_agent/__pycache__/document_processor.cpython-311.pyc b/agents/rag_agent/__pycache__/document_processor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c57777f4a887905465788e7e2618665b7988b298 Binary files /dev/null and b/agents/rag_agent/__pycache__/document_processor.cpython-311.pyc differ diff --git a/agents/rag_agent/__pycache__/query_processor.cpython-311.pyc b/agents/rag_agent/__pycache__/query_processor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65f0678d1b5cefac47b176b0e8614b12f947ca22 Binary files /dev/null and b/agents/rag_agent/__pycache__/query_processor.cpython-311.pyc differ diff --git a/agents/rag_agent/__pycache__/reranker.cpython-311.pyc b/agents/rag_agent/__pycache__/reranker.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf7c59686d8b2bd8e8cdf03228965021aa34738c Binary files /dev/null and b/agents/rag_agent/__pycache__/reranker.cpython-311.pyc differ diff --git a/agents/rag_agent/__pycache__/response_generator.cpython-311.pyc b/agents/rag_agent/__pycache__/response_generator.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91ccd48b5855131e1b488b7e9f1860cf8b3eb271 Binary files /dev/null and b/agents/rag_agent/__pycache__/response_generator.cpython-311.pyc differ diff --git a/agents/rag_agent/__pycache__/vector_store.cpython-311.pyc b/agents/rag_agent/__pycache__/vector_store.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..015f725df8141b83cb205d68251cff8701a19507 Binary files /dev/null and b/agents/rag_agent/__pycache__/vector_store.cpython-311.pyc differ diff --git a/agents/rag_agent/data_ingestion.py b/agents/rag_agent/data_ingestion.py new file mode 100644 index 0000000000000000000000000000000000000000..4ba5db1924c5eed501b74276ef2c444c1dfe7287 --- /dev/null +++ b/agents/rag_agent/data_ingestion.py @@ -0,0 +1,300 @@ +import os +import json +import logging +from pathlib import Path +import pandas as pd +from typing import List, Dict, Any, Optional, Union +from PyPDF2 import PdfReader + +from langchain_community.document_loaders import ( + TextLoader, + PyPDFLoader, + CSVLoader, + JSONLoader, + UnstructuredMarkdownLoader +) +from langchain_core.documents import Document + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +class MedicalDataIngestion: + """ + Handles loading of various medical document formats. + """ + def __init__(self, config_path: Optional[str] = None): + """ + Initialize the data ingestion pipeline. + + Args: + config_path: Optional path to configuration file + """ + self.logger = logging.getLogger(__name__) + + # Initialize loaders mapping + self.loaders = { + '.txt': self._load_text, + '.pdf': self._load_pdf, + '.csv': self._load_csv, + '.json': self._load_json, + '.md': self._load_markdown + } + + # Initialize stats + self.stats = { + "files_processed": 0, + "documents_ingested": 0, + "errors": 0 + } + + logger.info("MedicalDataIngestion initialized") + + def load_document(self, file_path: str) -> List[Document]: + """ + Load a document based on its file type. + + Args: + file_path: Path to the document file + + Returns: + List of Document objects + """ + path = Path(file_path) + if not path.exists(): + self.logger.error(f"File not found: {file_path}") + return [] + + suffix = path.suffix.lower() + if suffix not in self.loaders: + self.logger.warning(f"Unsupported file type: {suffix}") + return [] + + try: + return self.loaders[suffix](file_path) + except Exception as e: + self.logger.error(f"Error loading {file_path}: {str(e)}") + return [] + + def _load_text(self, file_path: str) -> List[Document]: + """Load text files.""" + try: + loader = TextLoader(file_path, encoding='utf-8') + return loader.load() + except Exception as e: + # Fallback to manual loading + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + doc = Document( + page_content=content, + metadata={"source": file_path, "type": "text"} + ) + return [doc] + except Exception as e2: + self.logger.error(f"Error loading text file: {e2}") + return [] + + def _load_pdf(self, file_path: str) -> List[Document]: + """Load PDF files.""" + try: + loader = PyPDFLoader(file_path) + return loader.load() + except Exception as e: + # Fallback to PyPDF2 + try: + reader = PdfReader(file_path) + documents = [] + + for page_num, page in enumerate(reader.pages): + page_text = page.extract_text() + if page_text: + doc = Document( + page_content=page_text, + metadata={ + "source": file_path, + "type": "pdf", + "page": page_num + 1 + } + ) + documents.append(doc) + + return documents + except Exception as e2: + self.logger.error(f"Error loading PDF file: {e2}") + return [] + + def _load_csv(self, file_path: str) -> List[Document]: + """Load CSV files.""" + try: + loader = CSVLoader(file_path) + return loader.load() + except Exception as e: + # Fallback to pandas + try: + df = pd.read_csv(file_path) + documents = [] + + # Find the column with the most text content + text_column = self._identify_content_column(df) + + for _, row in df.iterrows(): + content = str(row[text_column]) + metadata = { + "source": file_path, + "type": "csv" + } + + # Add other columns as metadata + for col in df.columns: + if col != text_column and not pd.isna(row[col]): + metadata[col] = str(row[col]) + + doc = Document( + page_content=content, + metadata=metadata + ) + documents.append(doc) + + return documents + except Exception as e2: + self.logger.error(f"Error loading CSV file: {e2}") + return [] + + def _load_json(self, file_path: str) -> List[Document]: + """Load JSON files.""" + try: + # For JSON files, we'll create documents from each entry + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + documents = [] + if isinstance(data, list): + for idx, item in enumerate(data): + content = json.dumps(item, indent=2) + doc = Document( + page_content=content, + metadata={ + "source": file_path, + "type": "json", + "index": idx + } + ) + documents.append(doc) + else: + content = json.dumps(data, indent=2) + doc = Document( + page_content=content, + metadata={"source": file_path, "type": "json"} + ) + documents.append(doc) + + return documents + except Exception as e: + self.logger.error(f"Error loading JSON file: {e}") + return [] + + def _load_markdown(self, file_path: str) -> List[Document]: + """Load Markdown files.""" + try: + loader = UnstructuredMarkdownLoader(file_path) + return loader.load() + except Exception as e: + # Fallback to simple text loading + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + doc = Document( + page_content=content, + metadata={"source": file_path, "type": "markdown"} + ) + return [doc] + except Exception as e2: + self.logger.error(f"Error loading Markdown file: {e2}") + return [] + + def load_directory(self, directory_path: str) -> List[Document]: + """ + Load all supported documents from a directory. + + Args: + directory_path: Path to the directory + + Returns: + List of all loaded documents + """ + dir_path = Path(directory_path) + if not dir_path.exists(): + self.logger.error(f"Directory not found: {directory_path}") + return [] + + all_documents = [] + for file_path in dir_path.iterdir(): + if file_path.is_file() and file_path.suffix.lower() in self.loaders: + self.logger.info(f"Loading {file_path.name}") + documents = self.load_document(str(file_path)) + all_documents.extend(documents) + + return all_documents + + def _identify_content_column(self, df: pd.DataFrame) -> str: + """ + Identify which column in a DataFrame contains the main content. + + Args: + df: Pandas DataFrame + + Returns: + Name of the content column + """ + # Look for columns with these names + content_column_names = ["content", "text", "description", "abstract", "body"] + + for name in content_column_names: + if name in df.columns: + return name + + # If no standard content column found, look for the column with longest strings + avg_lengths = {} + for col in df.columns: + if df[col].dtype == 'object': # Only check string columns + # Calculate average string length + avg_length = df[col].astype(str).apply(len).mean() + avg_lengths[col] = avg_length + + if avg_lengths: + # Return column with longest average string length + return max(avg_lengths.items(), key=lambda x: x[1])[0] + + # Fallback to first column + return df.columns[0] + + def _identify_json_content_field(self, item: Dict) -> Optional[str]: + """ + Identify which field in a JSON object contains the main content. + + Args: + item: Dictionary representing a JSON object + + Returns: + Name of the content field or None if not found + """ + # Look for fields with these names + content_field_names = ["content", "text", "description", "abstract", "body"] + + for name in content_field_names: + if name in item and isinstance(item[name], str): + return name + + # If no standard content field found, look for the field with longest string + text_fields = {} + for key, value in item.items(): + if isinstance(value, str) and len(value) > 50: + text_fields[key] = len(value) + + if text_fields: + # Return field with longest text + return max(text_fields.items(), key=lambda x: x[1])[0] + + return None \ No newline at end of file diff --git a/agents/rag_agent/document_processor.py b/agents/rag_agent/document_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..a1cbea4633fcbcc22e29e553c78c8d9d6ec6bb1d --- /dev/null +++ b/agents/rag_agent/document_processor.py @@ -0,0 +1,507 @@ +import re +import uuid +import logging +from typing import List, Dict, Any, Optional, Tuple +from pathlib import Path +import hashlib +from datetime import datetime +import nltk +from nltk.tokenize import sent_tokenize +import numpy as np +from langchain.schema import Document # Add this import for Document + +class MedicalDocumentProcessor: + """ + Processes ingested medical/mental health documents: chunking, embedding, and metadata enrichment. + """ + def __init__(self, config, embedding_model): + self.logger = logging.getLogger(__name__) + required_attrs = ["chunk_size", "chunk_overlap", "processed_docs_dir"] + for attr in required_attrs: + if not hasattr(config.rag, attr): + raise ValueError(f"Missing '{attr}' in config.rag. Please add it to config/rag.yaml.") + + self.embedding_model = embedding_model + self.chunk_size = config.rag.chunk_size + self.chunk_overlap = config.rag.chunk_overlap + self.processed_docs_dir = Path(config.rag.processed_docs_dir) + self.processed_docs_dir.mkdir(parents=True, exist_ok=True) + self.chunking_strategy = getattr(config.rag, "chunking_strategy", "hybrid") + self.logger.info(f"Using chunking strategy: {self.chunking_strategy}") + + # Add mental health-specific patterns + self.document_type_patterns = { + "mental_health_tip": re.compile(r"(?i)(stress|anxiety|depression|coping|therapy|mindfulness|prevention|tip|advice|support)"), + "clinical_note": re.compile(r"(?i)(chief complaint|history of present illness|hpi|past medical history|pmh|medications|assessment|plan|review of systems|ros|physical examination|lab results|imaging|impression|followup)"), + "patient_record": re.compile(r"(?i)(patient information|demographics|vital signs|allergies|immunizations|family history|social history|surgical history|problem list)"), + "treatment_guidelines": re.compile(r"(?i)(recommendations|guidelines|protocols|indications|contraindications|dosage|administration|monitoring|special populations)"), + "pharmacology": re.compile(r"(?i)(mechanism of action|pharmacokinetics|pharmacodynamics|dosing|adverse effects|warnings|interactions|storage|pregnancy considerations)"), + "general_medical": re.compile(r"(?i)(medical|health|wellness|nutrition|exercise|lifestyle|prevention|diagnosis|treatment|symptom|condition)")} + + self.section_headers = [ + r"^(stress|anxiety|depression|coping|therapy|mindfulness|tip|advice|support)", + r"^(chief complaint|history of present illness|hpi|past medical history|pmh|medications|assessment|plan|review of systems|ros|physical examination|lab results|imaging|impression|followup)", + r"^(patient information|demographics|vital signs|allergies|immunizations|family history|social history|surgical history|problem list)", + r"^(recommendations|guidelines|protocols|indications|contraindications|dosage|administration|monitoring|special populations)", + r"^(mechanism of action|pharmacokinetics|pharmacodynamics|dosing|adverse effects|warnings|interactions|storage|pregnancy considerations)", + r"^(medical|health|wellness|nutrition|exercise|lifestyle|prevention|diagnosis|treatment|symptom|condition)"] + filtered_headers = [header for header in self.section_headers if header.strip()] + self.section_pattern = re.compile(f"({'|'.join(filtered_headers)})", re.IGNORECASE) + + # Mental health entity patterns + self.medical_entity_categories = { + "mental_health": r"(stress|anxiety|depression|suicide|coping|therapy|counseling|mindfulness|panic|self[- ]harm|hopelessness|support group|resilience|burnout|well-being|mental health)", + "symptom": r"(headache|fatigue|insomnia|mood swings|irritability|concentration issues|memory problems|appetite changes|sleep disturbances|social withdrawal)", + "treatment": r"(medication|therapy|cognitive behavioral therapy|CBT|dialectical behavior therapy|DBT|exposure therapy|medication management|psychiatric evaluation|support group|mindfulness training)", + "diagnosis": r"(bipolar disorder|schizophrenia|post-traumatic stress disorder|PTSD|obsessive[- ]compulsive disorder|OCD|generalized anxiety disorder|GAD|major depressive disorder|MDD|panic disorder|social anxiety disorder|SAD)", + "risk_factor": r"(genetic predisposition|family history|trauma|substance abuse|chronic illness|stressful life events|social isolation|poor coping skills|low resilience|lack of support)", + "intervention": r"(cognitive restructuring|exposure therapy|mindfulness meditation|relaxation techniques|stress management|problem-solving skills|assertiveness training|social skills training|self-care strategies|crisis intervention)"} + all_patterns = [f"(?P<{cat}>{pat})" for cat, pat in self.medical_entity_categories.items()] + self.medical_entity_pattern = re.compile("|".join(all_patterns), re.IGNORECASE) + + + + def process_documents(self, documents: List[Document]) -> List[Document]: + """Process and chunk documents.""" + all_chunks = [] + + for doc in documents: + # Get content and metadata + content = doc.page_content if hasattr(doc, 'page_content') else str(doc) + metadata = doc.metadata if hasattr(doc, 'metadata') else {} + + # Create chunks based on strategy + if self.chunking_strategy == "hybrid": + chunks = self._hybrid_chunking(content, metadata) + elif self.chunking_strategy == "semantic": + chunks = self._semantic_chunking(content, metadata) + else: + chunks = self._fixed_chunking(content, metadata) + + # Add embeddings to chunks + for chunk in chunks: + if not chunk.metadata.get('embedding'): + embedding = self.embedding_model.encode(chunk.page_content) + chunk.metadata['embedding'] = embedding.tolist() + + all_chunks.extend(chunks) + + self.logger.info(f"Processed {len(all_chunks)} chunks from {len(documents)} documents") + return all_chunks + + def _detect_document_type(self, text: str) -> str: + """ + Detect the type of medical document based on content patterns. + + Args: + text: Document text + + Returns: + Document type string + """ + type_scores = {} + + # Check each document type pattern + for doc_type, pattern in self.document_type_patterns.items(): + matches = pattern.findall(text) + type_scores[doc_type] = len(matches) + + # Find the document type with the highest number of matches + if max(type_scores.values(), default=0) > 0: + return max(type_scores.items(), key=lambda x: x[1])[0] + + # Default to general if no clear type + return "general_medical" + + + def _split_into_paragraphs(self, text: str, section_name: str) -> List[Tuple[str, str, str]]: + """ + Split text into paragraph-level chunks. + + Args: + text: Text to split + section_name: Name of the section + + Returns: + List of (chunk_text, section_name, level) tuples + """ + paragraphs = re.split(r'\n\s*\n', text) + chunks = [] + + for i, para in enumerate(paragraphs): + if not para.strip(): + continue + + # Check if paragraph is too large + if len(para.split()) > self.chunk_size: + # Further split into sentences + sentences = sent_tokenize(para) + current_chunk = [] + current_length = 0 + + for sentence in sentences: + sentence_length = len(sentence.split()) + + if current_length + sentence_length > self.chunk_size and current_chunk: + # Add current chunk + chunk_text = " ".join(current_chunk) + chunks.append((chunk_text, section_name, "paragraph")) + current_chunk = [] + current_length = 0 + + current_chunk.append(sentence) + current_length += sentence_length + + # Add final chunk if not empty + if current_chunk: + chunk_text = " ".join(current_chunk) + chunks.append((chunk_text, section_name, "paragraph")) + else: + chunks.append((para.strip(), section_name, "paragraph")) + + return chunks + + def _create_sliding_window_chunks(self, text: str) -> List[Tuple[str, str, str]]: + """ + Create overlapping chunks using a sliding window approach. + + Args: + text: Document text + + Returns: + List of (chunk_text, section_name, level) tuples + """ + sentences = sent_tokenize(text) + chunks = [] + + # If very few sentences, return as one chunk + if len(sentences) <= 3: + return [(text, "full_document", "document")] + + # Calculate stride (number of sentences to slide window) + stride = max(1, (self.chunk_size - self.chunk_overlap) // 20) # Approximate words per sentence + + # Create chunks with sliding window + for i in range(0, len(sentences), stride): + # Determine end index for current window + window_size = min(i + max(3, self.chunk_size // 20), len(sentences)) + + # Get text for current window + window_text = " ".join(sentences[i:window_size]) + + # Detect current section if possible + section_match = self.section_pattern.search(window_text) + section_name = section_match.group(0) if section_match else "sliding_window" + + chunks.append((window_text, section_name, "sliding")) + + return chunks + + def _create_recursive_chunks(self, text: str) -> List[Tuple[str, str, str]]: + """ + Create hierarchical chunks at different levels of granularity. + + Args: + text: Document text + + Returns: + List of (chunk_text, section_name, level) tuples + """ + chunks = [] + + # Level 1: Document-level chunk (if not too large) + if len(text.split()) <= self.chunk_size * 2: + chunks.append((text, "full_document", "document")) + + # Level 2: Section-level chunks + section_matches = list(self.section_pattern.finditer(text)) + + if section_matches: + for i in range(len(section_matches)): + start_pos = section_matches[i].start() + section_name = text[section_matches[i].start():section_matches[i].end()].strip() + + # Determine section end + if i < len(section_matches) - 1: + end_pos = section_matches[i+1].start() + else: + end_pos = len(text) + + section_text = text[start_pos:end_pos].strip() + + # Add section as a chunk + if section_text and len(section_text.split()) <= self.chunk_size: + chunks.append((section_text, section_name, "section")) + + # Level 3: Paragraph-level chunks + paragraphs = re.split(r'\n\s*\n', section_text) + + for j, para in enumerate(paragraphs): + if para.strip() and len(para.split()) <= self.chunk_size: + chunks.append((para.strip(), section_name, "paragraph")) + + # Level 4: Sentence-level chunks for important sentences + if self._contains_important_entities(para): + sentences = sent_tokenize(para) + for sentence in sentences: + if self._contains_important_entities(sentence): + chunks.append((sentence.strip(), section_name, "sentence")) + else: + # No clear sections, fall back to paragraphs and sentences + paragraphs = re.split(r'\n\s*\n', text) + + for para in paragraphs: + if para.strip() and len(para.split()) <= self.chunk_size: + chunks.append((para.strip(), "paragraph", "paragraph")) + + return chunks + def _embed_chunks(self, chunks: List[Document]) -> List[Document]: + """Add embeddings to chunks.""" + for chunk in chunks: + if chunk.page_content: + embedding = self.embedding_model.encode(chunk.page_content) + chunk.metadata['embedding'] = embedding.tolist() + return chunks + + def _hybrid_chunking(self, content: str, metadata: Dict) -> List[Document]: + """Hybrid chunking combining semantic and fixed-size approaches.""" + # First apply semantic chunking + semantic_chunks = self._semantic_chunking(content, metadata) + + # Then apply fixed-size chunking to large semantic chunks + final_chunks = [] + for chunk in semantic_chunks: + if len(chunk.page_content) > self.chunk_size * 2: + # Break down large chunks + sub_chunks = self._fixed_chunking(chunk.page_content, chunk.metadata) + final_chunks.extend(sub_chunks) + else: + final_chunks.append(chunk) + + # Add embeddings to all chunks + final_chunks = self._embed_chunks(final_chunks) + + return final_chunks + + def _semantic_chunking(self, content: str, metadata: Dict) -> List[Document]: + """Chunk based on semantic boundaries (paragraphs, sections).""" + # Split by double newlines for paragraphs + paragraphs = content.split('\n\n') + chunks = [] + current_chunk = "" + + for para in paragraphs: + if len(current_chunk) + len(para) < self.chunk_size: + current_chunk += para + "\n\n" + else: + if current_chunk: + chunks.append(Document( + page_content=current_chunk.strip(), + metadata=metadata.copy() + )) + current_chunk = para + "\n\n" + + if current_chunk: + chunks.append(Document( + page_content=current_chunk.strip(), + metadata=metadata.copy() + )) + + return chunks + + def _fixed_chunking(self, content: str, metadata: Dict) -> List[Document]: + """Fixed-size chunking with overlap.""" + chunks = [] + start = 0 + + while start < len(content): + end = start + self.chunk_size + chunk_text = content[start:end] + + # Ensure we don't cut in the middle of a word + if end < len(content) and not content[end].isspace(): + # Find the last space before the end + last_space = chunk_text.rfind(' ') + if last_space > 0: + end = start + last_space + chunk_text = content[start:end] + + chunks.append(Document( + page_content=chunk_text.strip(), + metadata=metadata.copy() + )) + + start = end - self.chunk_overlap + + return chunks + + def _chunk_by_sentences(self, text: str, section_name: str, chunk_size: int) -> List[Tuple[str, str, str]]: + """ + Create chunks by grouping sentences while respecting chunk size. + + Args: + text: Text to chunk + section_name: Name of the section + chunk_size: Maximum chunk size in words + + Returns: + List of (chunk_text, section_name, level) tuples + """ + sentences = sent_tokenize(text) + chunks = [] + current_chunk = [] + current_length = 0 + + for sentence in sentences: + sentence_words = sentence.split() + sentence_length = len(sentence_words) + + # If adding this sentence exceeds chunk size and we already have content + if current_length + sentence_length > chunk_size and current_chunk: + # Save current chunk + chunk_text = " ".join(current_chunk) + chunks.append((chunk_text, section_name, "sentences")) + + # Start new chunk with overlap + # Find a good overlap point that doesn't split mid-thought + overlap_sentences = min(2, len(current_chunk)) + current_chunk = current_chunk[-overlap_sentences:] + current_length = len(" ".join(current_chunk).split()) + + # Add sentence to current chunk + current_chunk.append(sentence) + current_length += sentence_length + + # Add final chunk if not empty + if current_chunk: + chunk_text = " ".join(current_chunk) + chunks.append((chunk_text, section_name, "sentences")) + + return chunks + + def _contains_important_entities(self, text: str) -> bool: + """ + Check if text contains important medical entities. + + Args: + text: Text to check + + Returns: + Boolean indicating presence of important entities + """ + entities = self._extract_medical_entities(text) + return len(entities) > 0 + + def _calculate_chunk_importance(self, text: str, position: int, total_chunks: int) -> float: + """ + Calculate importance score for a chunk based on various factors. + + Args: + text: Chunk text + position: Position in document + total_chunks: Total number of chunks + + Returns: + Importance score between 0 and 1 + """ + # Extract entities and count them + entities = self._extract_medical_entities(text) + entity_count = len(entities) + + # Calculate entity density + word_count = len(text.split()) + entity_density = entity_count / max(1, word_count / 100) + + # Position importance - first and last chunks often contain key information + position_score = 0.0 + if position == 0 or position == total_chunks - 1: + position_score = 0.2 + elif position < total_chunks * 0.2 or position > total_chunks * 0.8: + position_score = 0.1 + + # Check for important keywords + keyword_score = 0.0 + important_keywords = ["significant", "important", "critical", "essential", "key", + "finding", "diagnosis", "recommend", "conclude", "summary"] + for keyword in important_keywords: + if re.search(r"\b" + re.escape(keyword) + r"\b", text, re.IGNORECASE): + keyword_score += 0.05 + keyword_score = min(0.2, keyword_score) + + # Combine scores + importance_score = min(1.0, 0.3 * entity_density + position_score + keyword_score) + + return importance_score + + def _extract_medical_entities(self, text: str) -> Dict[str, List[str]]: + """ + Extract medical entities from text by category. + + Args: + text: Input text + + Returns: + Dictionary of categorized medical entities + """ + categorized_entities = {} + + for category, pattern in self.medical_entity_categories.items(): + category_pattern = re.compile(pattern) + matches = set(m.group(0).lower() for m in category_pattern.finditer(text)) + if matches: + categorized_entities[category] = list(matches) + + return categorized_entities + + def _save_processed_chunks(self, doc_id: str, chunks: List[Dict[str, Any]]): + """ + Save processed chunks to disk for potential reuse. + + Args: + doc_id: Document identifier + chunks: List of processed chunks + """ + try: + import json + + # Create filename + filename = f"{doc_id}_processed.json" + filepath = self.processed_docs_dir / filename + + # Save chunks without embeddings (to save space) + chunks_without_embeddings = [] + for chunk in chunks: + chunk_copy = chunk.copy() + # Remove embedding as it's large and can be regenerated + del chunk_copy["embedding"] + chunks_without_embeddings.append(chunk_copy) + + with open(filepath, 'w') as f: + json.dump(chunks_without_embeddings, f) + + self.logger.info(f"Saved processed chunks to {filepath}") + except Exception as e: + self.logger.warning(f"Failed to save processed chunks: {e}") + + def batch_process_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Process a batch of documents. + + Args: + documents: List of dictionaries with 'content' and 'metadata' keys + + Returns: + List of processed document chunks with embeddings + """ + all_processed_chunks = [] + + for doc in documents: + try: + processed_chunks = self.process_document(doc["content"], doc["metadata"]) + all_processed_chunks.extend(processed_chunks) + except Exception as e: + self.logger.error(f"Error processing document: {e}") + # Continue with the next document + continue + + return all_processed_chunks \ No newline at end of file diff --git a/agents/rag_agent/query_processor.py b/agents/rag_agent/query_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..7191a2c0e15ec67e7afd41d88df244784701903b --- /dev/null +++ b/agents/rag_agent/query_processor.py @@ -0,0 +1,142 @@ +import logging +import re +import uuid +from typing import List, Dict, Any, Optional, Tuple, Union +from datetime import datetime + +class QueryProcessor: + """ + Processor for mental health queries with entity extraction and specialty detection. + """ + def __init__(self, config, embedding_model): + self.logger = logging.getLogger(__name__) + self.embedding_model = embedding_model + self.config = config + + # Only mental health entity patterns + self.medical_entity_categories = { + "mental_health": r"(stress|anxiety|depression|suicide|coping|therapy|counseling|mindfulness|panic|self[- ]harm|hopelessness|support group|resilience|burnout|well-being|mental health)", + "symptom": r"(insomnia|mood swings|irritability|concentration issues|memory problems|appetite changes|sleep disturbances|social withdrawal|fatigue|hopelessness)", + "treatment": r"(medication|therapy|cognitive behavioral therapy|CBT|dialectical behavior therapy|DBT|exposure therapy|medication management|psychiatric evaluation|support group|mindfulness training)", + "diagnosis": r"(bipolar disorder|schizophrenia|post-traumatic stress disorder|PTSD|obsessive[- ]compulsive disorder|OCD|generalized anxiety disorder|GAD|major depressive disorder|MDD|panic disorder|social anxiety disorder|SAD)", + "risk_factor": r"(genetic predisposition|family history|trauma|substance abuse|chronic illness|stressful life events|social isolation|poor coping skills|low resilience|lack of support)", + "intervention": r"(cognitive restructuring|exposure therapy|mindfulness meditation|relaxation techniques|stress management|problem-solving skills|assertiveness training|social skills training|self-care strategies|crisis intervention)" + } + + # Only mental health specialty keywords + self.specialty_keywords = { + "psychiatry": [ + "mental health", "depression", "anxiety", "psychiatric", "disorder", + "schizophrenia", "bipolar", "therapy", "behavioral", "psychological", + "stress", "counseling", "mindfulness", "panic", "self-harm", "support group", + "resilience", "burnout", "well-being" + ] + } + + def process_query(self, query: str) -> Dict[str, Any]: + """Process query to extract metadata and intent - NOT THE FULL RAG PIPELINE.""" + try: + # Extract medical entities + entities = self._extract_medical_entities(query) + + # Determine query intent + intent = self._determine_query_intent(query) + + # Normalize query to handle typos and variations + normalized_query = self._normalize_query(query) + + # Determine medical specialty + specialty = self._detect_specialty(query) + + # Create metadata/filters for retrieval + filters = { + 'query_id': str(uuid.uuid4()), + 'timestamp': datetime.now().isoformat(), + 'query_intent': intent, + 'medical_entities': entities, + 'specialty': specialty + } + + self.logger.info(f"Processed query with filters: {filters}") + + return { + 'filters': filters, + 'intent': intent, + 'entities': entities, + 'expanded_query': self._expand_query(query) + } + + except Exception as e: + self.logger.error(f"Error processing query: {e}") + return self._get_default_metadata() + + + + def _normalize_query(self, query: str) -> str: + """Normalize query to handle common typos and variations.""" + import re + + # Convert to lowercase + normalized = query.lower() + + # Fix common typos or abbreviations + typo_corrections = { + r'\brad\b': 'road', # rad -> road + r'\bdepressed\b': 'depression', + r'\bim\b': "i'm", + r'\bu\b': 'you', + r'\bur\b': 'your', + } + + for pattern, replacement in typo_corrections.items(): + normalized = re.sub(pattern, replacement, normalized) + + return normalized + + def _expand_query(self, query: str) -> str: + """Expand query with related terms.""" + expansions = { + "depression": "depression mood sadness hopelessness", + "anxiety": "anxiety worry nervousness panic", + "stress": "stress pressure tension burnout", + "therapy": "therapy counseling psychotherapy CBT DBT", + "suicide": "suicide self-harm hopelessness crisis", + "support": "support group counseling help" + } + expanded = query + for term, expansion in expansions.items(): + if re.search(r"\b" + re.escape(term) + r"\b", query.lower()): + expanded = f"{expanded} {expansion}" + return expanded + + def _extract_medical_entities(self, text: str) -> Dict[str, List[str]]: + """Extract categorized medical entities from text.""" + categorized_entities = {} + for category, pattern in self.medical_entity_categories.items(): + category_pattern = re.compile(pattern, re.IGNORECASE) + matches = set(m.group(0).lower() for m in category_pattern.finditer(text)) + if matches: + categorized_entities[category] = list(matches) + return categorized_entities + + def _detect_specialty(self, text: str) -> Optional[str]: + """Detect medical specialty from text.""" + text_lower = text.lower() + for specialty, keywords in self.specialty_keywords.items(): + for keyword in keywords: + if re.search(r"\b" + re.escape(keyword.lower()) + r"\b", text_lower): + return specialty + return None + + def _determine_query_intent(self, text: str) -> str: + """Determine the intent of the query.""" + text_lower = text.lower() + if re.search(r"\b(what is|define|explain|describe|meaning of)\b", text_lower): + return "definition" + elif re.search(r"\b(treat|therapy|medication|cure|manage|drug|prescription)\b", text_lower): + return "treatment" + elif re.search(r"\b(diagnose|diagnostic|symptom|sign|identify|determine)\b", text_lower): + return "diagnosis" + elif re.search(r"\b(prevent|preventive|avoid|risk factor|reduction)\b", text_lower): + return "prevention" + return "general_information" \ No newline at end of file diff --git a/agents/rag_agent/reranker.py b/agents/rag_agent/reranker.py new file mode 100644 index 0000000000000000000000000000000000000000..556911cbeaba0210a3f1abaa79b1e45dd967dc06 --- /dev/null +++ b/agents/rag_agent/reranker.py @@ -0,0 +1,69 @@ +import logging +from typing import List, Dict, Any +from sentence_transformers import CrossEncoder +from langchain_core.documents import Document + +class Reranker: + """ + Reranks retrieved documents using a cross-encoder model for more accurate results. + """ + def __init__(self, config): + """ + Initialize the reranker with configuration. + Args: + config: Configuration object containing reranker settings + """ + self.logger = logging.getLogger(__name__) + try: + if not hasattr(config.rag, "reranker_model"): + raise ValueError("Missing 'reranker_model' in config.rag. Please add it to config/rag.yaml.") + self.model_name = config.rag.reranker_model + self.logger.info(f"Loading reranker model: {self.model_name}") + self.model = CrossEncoder(self.model_name) + self.top_k = getattr(config.rag, "reranker_top_k", 5) + except Exception as e: + self.logger.error(f"Error loading reranker model: {e}") + raise + + def rerank(self, query: str, documents: List[Document], top_k: int = 5) -> List[Document]: + """ + Rerank documents based on relevance to the query. + + Args: + query: The user query + documents: List of Document objects to rerank + top_k: Number of top documents to return + + Returns: + List of reranked Document objects + """ + if not documents: + return [] + + try: + # Create pairs of (query, document_content) for the reranker + pairs = [] + for doc in documents: + # Use doc.page_content instead of doc['content'] + content = doc.page_content if hasattr(doc, 'page_content') else str(doc) + pairs.append([query, content]) + + # Get scores from the reranker + scores = self.model.predict(pairs) + + # Combine documents with their scores + doc_scores = list(zip(documents, scores)) + + # Sort by score (descending) + doc_scores.sort(key=lambda x: x[1], reverse=True) + + # Return top_k documents + reranked_docs = [doc for doc, score in doc_scores[:top_k]] + + self.logger.info(f"Reranked {len(documents)} documents, returning top {len(reranked_docs)}") + return reranked_docs + + except Exception as e: + self.logger.error(f"Error during reranking: {e}") + self.logger.warning("Falling back to original ranking") + return documents[:top_k] \ No newline at end of file diff --git a/agents/rag_agent/response_generator.py b/agents/rag_agent/response_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..bde32c4ade31ffa1d1d71e25f16ddc952a561381 --- /dev/null +++ b/agents/rag_agent/response_generator.py @@ -0,0 +1,326 @@ +# File: agents/rag_agent/response_generator.py + +import logging +from typing import List, Dict, Any, Optional +from langchain_core.documents import Document + +class ResponseGenerator: + """ + Generates structured responses with empathy, solution, and recommendations using RAG pipeline. + """ + def __init__(self, config=None, llm=None): + """Initialize ResponseGenerator with optional config and LLM""" + self.logger = logging.getLogger(__name__) + self.config = config + self.llm = llm + + # Set default values if config is provided + if config and hasattr(config, 'rag'): + self.max_context_length = getattr(config.rag, "max_context_length", 2048) + self.include_sources = getattr(config.rag, "include_sources", True) + else: + self.max_context_length = 2048 + self.include_sources = True + + def generate_response(self, query: str, retrieved_docs: List[Any], + chat_history: Optional[str] = None, + user_emotion: Optional[str] = None, + mental_health_status: Optional[str] = None, + user_context: Optional[Dict] = None) -> Dict[str, Any]: + """Generate structured response using RAG pipeline with guaranteed structure.""" + + try: + print(f"[ResponseGenerator] Processing: {query[:50]}...") + print(f"[ResponseGenerator] Emotion: {user_emotion}, Status: {mental_health_status}") + + # Extract sources from documents + sources = self._extract_sources(retrieved_docs) + + # Build context from retrieved documents + context = self._build_context_from_docs(retrieved_docs) + + # Get user info + emotion = user_emotion or "neutral" + status = mental_health_status or "Unknown" + message_count = user_context.get('message_count', 1) if user_context else 1 + + # Try RAG-enhanced structured response first + try: + print("[ResponseGenerator] Generating RAG-enhanced structured response...") + response_text = self._generate_rag_structured_response( + query, context, emotion, status, message_count + ) + + # Verify structure is present + if self._verify_response_structure(response_text): + print("[ResponseGenerator] RAG response has complete structure") + else: + print("[ResponseGenerator] RAG response missing structure, enhancing...") + response_text = self._enhance_with_guaranteed_structure( + response_text, query, emotion, status + ) + + except Exception as llm_error: + print(f"[ResponseGenerator] LLM generation failed: {llm_error}") + print("[ResponseGenerator] Using guaranteed structured fallback...") + response_text = self._build_structured_response(query, emotion, status) + + confidence = self._calculate_confidence(sources) + + print(f"[ResponseGenerator] Final response: {response_text[:100]}...") + + return { + "response": response_text, + "sources": sources, + "confidence": confidence + } + + except Exception as e: + self.logger.error(f"Error generating response: {e}") + return self._generate_guaranteed_structure(query, user_emotion, mental_health_status, user_context) + + def _build_context_from_docs(self, retrieved_docs: List[Any]) -> str: + """Build context from RAG pipeline retrieved documents.""" + context_parts = [] + + for i, doc in enumerate(retrieved_docs[:3]): + content = "" + if hasattr(doc, 'page_content'): + content = doc.page_content + elif isinstance(doc, dict): + content = doc.get('content', doc.get('page_content', '')) + else: + content = str(doc) + + if content: + # Truncate for context window + truncated_content = content[:400] + "..." if len(content) > 400 else content + context_parts.append(f"[Document {i+1}]\n{truncated_content}") + + return "\n\n".join(context_parts) if context_parts else "No specific context available." + + def _generate_rag_structured_response(self, query: str, context: str, emotion: str, status: str, message_count: int) -> str: + """Generate response using RAG context with structured prompt.""" + + if not self.llm: + # Fallback if no LLM is provided + return self._build_structured_response(query, emotion, status) + + structured_prompt = f"""You are a compassionate mental health support assistant. Using the provided context, create a response with EXACTLY 3 sections: + +USER QUERY: "{query}" +USER EMOTION: {emotion} +MENTAL HEALTH STATUS: {status} +MESSAGE COUNT: {message_count} + +CONTEXT FROM RAG PIPELINE: +{context} + +CRITICAL: Your response MUST have ALL THREE sections in this order: + +1. EMPATHY/ACKNOWLEDGEMENT (Start with "I understand..." or "I hear..." or "I can see..."): + - Acknowledge their specific feelings from the query + - Validate their experience + - Show understanding and support + +2. SOLUTION/INFORMATION (Include words like "can help", "try", "practice", "research shows"): + - Use the context to provide relevant information about their concern + - Explain what might be happening or why they feel this way + - Offer evidence-based insights or coping strategies + +3. RECOMMENDATIONS (Include words like "recommend", "consider", "suggest"): + - Give concrete next steps based on their status ({status}) + - Suggest professional help if needed + - Provide specific actions they can take + +Use the RAG context to make your response more informative and specific. Keep it 6-9 sentences total. Be warm and conversational, not clinical. + +Response:""" + + try: + response = self.llm.invoke(structured_prompt) + return response.content if hasattr(response, 'content') else str(response) + except Exception as e: + print(f"Error invoking LLM: {e}") + return self._build_structured_response(query, emotion, status) + + def _verify_response_structure(self, response_text: str) -> bool: + """Verify the response has all three required sections.""" + + # Check for empathy keywords + has_empathy = any(word in response_text.lower() for word in [ + 'understand', 'hear', 'see', 'sorry', 'valid', 'difficult', 'acknowledge' + ]) + + # Check for solution keywords + has_solution = any(word in response_text.lower() for word in [ + 'try', 'practice', 'can help', 'technique', 'strategy', 'approach', + 'research shows', 'studies', 'evidence' + ]) + + # Check for recommendation keywords + has_recommendations = any(word in response_text.lower() for word in [ + 'recommend', 'consider', 'suggest', 'professional', 'counselor', + 'therapist', 'healthcare' + ]) + + print(f"[Structure Check] Empathy: {has_empathy}, Solution: {has_solution}, Recommendations: {has_recommendations}") + + return has_empathy and has_solution and has_recommendations + + def _enhance_with_guaranteed_structure(self, partial_response: str, query: str, emotion: str, status: str) -> str: + """Enhance partial response to ensure complete structure.""" + + # Analyze what's missing and add it + has_empathy = any(word in partial_response.lower() for word in ['understand', 'hear', 'sorry', 'valid']) + has_solution = any(word in partial_response.lower() for word in ['try', 'practice', 'can help', 'strategy']) + has_recommendations = any(word in partial_response.lower() for word in ['recommend', 'consider', 'suggest']) + + enhanced_parts = [] + + # Add empathy if missing + if not has_empathy: + empathy = self._generate_empathy_section(query, emotion) + enhanced_parts.append(empathy) + + # Add the existing response + enhanced_parts.append(partial_response) + + # Add solution if missing + if not has_solution: + solution = self._generate_solution_section(query, emotion) + enhanced_parts.append(solution) + + # Add recommendations if missing + if not has_recommendations: + recommendations = self._generate_recommendations_section(query, status) + enhanced_parts.append(recommendations) + + return " ".join(enhanced_parts) + + def _generate_empathy_section(self, query: str, emotion: str) -> str: + """Generate empathy section based on query.""" + query_lower = query.lower() + + if "stress" in query_lower and ("school" in query_lower or "work" in query_lower): + return "I understand that you're feeling overwhelmed by academic/work pressure, and these feelings are completely valid." + elif "anxiety" in query_lower: + return "I hear that anxiety is making things really challenging for you right now." + elif "sad" in query_lower or "depressed" in query_lower: + return "I can see that you're going through a difficult time with these heavy feelings." + else: + return f"I understand that you're dealing with {emotion} feelings, and I want you to know your experience is valid." + + def _generate_solution_section(self, query: str, emotion: str) -> str: + """Generate solution section based on query.""" + query_lower = query.lower() + + if "stress" in query_lower and "school" in query_lower: + return "Academic stress can be managed through time management techniques and breaking large tasks into smaller, manageable steps." + elif "anxiety" in query_lower: + return "Anxiety can be helped through breathing techniques and grounding exercises that activate your body's relaxation response." + else: + return "There are proven strategies that can help you manage these feelings and improve your well-being over time." + + # def _generate_recommendations_section(self, query: str, status: str) -> str: + # """Generate recommendations based on status and query.""" + + # if status == "Severe": + # return "I strongly recommend reaching out to a mental health professional immediately, and consider calling 988 if you need crisis support." + # elif "school" in query.lower(): + # return "Consider speaking with a school counselor and practicing stress-reduction techniques like regular breaks and exercise." + # else: + # return "I recommend considering professional support and incorporating daily stress-reduction activities into your routine." + + def _extract_sources(self, retrieved_docs: List[Any]) -> List[Dict]: + """Extract sources from retrieved documents.""" + sources = [] + + for i, doc in enumerate(retrieved_docs[:3]): + if hasattr(doc, 'page_content'): + content = doc.page_content + metadata = doc.metadata if hasattr(doc, 'metadata') else {} + source = metadata.get('source', f'Document {i+1}') + score = metadata.get('score', 0.5) + elif isinstance(doc, dict): + content = doc.get('content', doc.get('page_content', '')) + metadata = doc.get('metadata', {}) + source = metadata.get('source', doc.get('source', f'Document {i+1}')) + score = doc.get('score', metadata.get('score', 0.5)) + else: + content = str(doc) + source = f'Document {i+1}' + score = 0.5 + + if content: + sources.append({ + "source": source, + "score": float(score), + "snippet": content[:100] + "..." if len(content) > 100 else content + }) + + return sources + + def _build_structured_response(self, query: str, emotion: str, status: str) -> str: + """Build guaranteed structured response (fallback method).""" + + query_lower = query.lower() + + print(f"[StructureBuilder] Building guaranteed structure for: {query_lower[:30]}...") + + # 1. EMPATHY/ACKNOWLEDGMENT + if "sad" in query_lower and ("depressed" in query_lower or "depression" in query_lower): + empathy = "I understand you're going through a really difficult time with sadness and depression. These feelings can be overwhelming and exhausting, and I want you to know that reaching out shows real strength." + elif "stress" in query_lower and ("school" in query_lower or "work" in query_lower): + empathy = "I hear that you're feeling really stressed about your school/work responsibilities. Academic and work pressure can be overwhelming, and it's completely valid to feel this way." + elif "anxiety" in query_lower or "anxious" in query_lower: + empathy = "I understand that anxiety can feel incredibly overwhelming and scary. What you're experiencing is very real, and your struggle with this is completely valid." + else: + empathy = f"I hear that you're dealing with {emotion} feelings, and I want you to know that what you're experiencing is valid and understandable." + + # 2. SOLUTION/INFORMATION + if "stress" in query_lower and ("school" in query_lower or "work" in query_lower): + solution = "Academic and work stress can be managed through time management techniques, breaking large tasks into smaller steps, and practicing stress-reduction activities. Research shows that regular breaks and boundary-setting can help you regain control." + elif "anxiety" in query_lower or "anxious" in query_lower: + solution = "Anxiety is highly treatable through various approaches including breathing techniques, grounding exercises, and cognitive strategies. Practice deep breathing (inhale for 4, hold for 4, exhale for 6) to help activate your body's relaxation response." + elif "sad" in query_lower and ("depressed" in query_lower or "depression" in query_lower): + solution = "Depression involves complex brain chemistry changes that affect mood, energy, and motivation. Research shows that combining professional support with self-care practices can help improve symptoms over time." + else: + solution = "There are proven strategies and techniques that can help you manage these feelings and improve your overall mental well-being through consistent practice and the right support." + + # 3. RECOMMENDATIONS + if status == "Severe": + recommendations = "I strongly recommend reaching out to a mental health professional immediately for proper assessment and support. You can also call the crisis helpline at 988 if you need immediate assistance." + elif "school" in query_lower or "work" in query_lower: + recommendations = "Consider speaking with a counselor about stress management, practice setting boundaries with your workload, and explore stress-reduction activities like regular exercise or meditation that fit your schedule." + else: + recommendations = "Consider speaking with a mental health professional for personalized guidance and support. You might also try incorporating stress-reduction activities like deep breathing exercises, regular physical activity, or journaling into your routine." + + final_response = f"{empathy} {solution} {recommendations}" + print(f"[StructureBuilder] Built guaranteed response with {len(final_response)} characters") + + return final_response + + def _calculate_confidence(self, sources: List[Dict[str, Any]]) -> float: + """Calculate confidence based on sources.""" + if not sources: + return 0.4 + + scores = [s.get('score', 0) for s in sources[:3]] + avg_score = sum(scores) / len(scores) if scores else 0.0 + + if len(sources) >= 3 and avg_score > 0.5: + return min(avg_score * 1.2, 1.0) + + return max(avg_score, 0.4) + + def _generate_guaranteed_structure(self, query: str, emotion: str, status: str, user_context: Dict) -> Dict[str, Any]: + """Generate fallback response with guaranteed structure.""" + + structured_response = self._build_structured_response(query, emotion or "concerned", status or "Unknown") + + return { + "response": structured_response, + "sources": [], + "confidence": 0.4 + } \ No newline at end of file diff --git a/agents/rag_agent/vector_store.py b/agents/rag_agent/vector_store.py new file mode 100644 index 0000000000000000000000000000000000000000..8f6473b177731a91b0a94216db544ea931a112fa --- /dev/null +++ b/agents/rag_agent/vector_store.py @@ -0,0 +1,298 @@ +from typing import List, Dict, Any, Optional, Union +import logging +import uuid +import numpy as np +from qdrant_client import QdrantClient +from qdrant_client.http import models as qdrant_models +from qdrant_client.http.exceptions import UnexpectedResponse + +# Import Document if available, otherwise define a minimal stub +try: + from langchain.schema import Document +except ImportError: + class Document: + def __init__(self, page_content, metadata): + self.page_content = page_content + self.metadata = metadata + +class QdrantRetriever: + """ + Handles storage and retrieval of medical documents using Qdrant vector database. + """ + def __init__(self, config): + """ + Initialize the Qdrant retriever with configuration. + Args: + config: Configuration object containing Qdrant settings + """ + self.logger = logging.getLogger(__name__) + self.collection_name = config.rag.collection_name + self.embedding_dim = config.rag.embedding_dim + self.distance_metric = config.rag.distance_metric + + # Force in-memory mode for now + self.client = QdrantClient(":memory:") + + + # # Initialize Qdrant client + # if getattr(config.rag, "use_local", True): + # self.client = QdrantClient( + # path=config.rag.local_path + # ) + # else: + # self.client = QdrantClient( + # url=getattr(config.rag, "url", None), + # api_key=getattr(config.rag, "api_key", None), + # ) + + # Ensure collection exists + self._ensure_collection() + + + + def _ensure_collection(self): + """Create collection if it doesn't exist.""" + try: + collections = self.client.get_collections().collections + collection_names = [collection.name for collection in collections] + if self.collection_name not in collection_names: + self.logger.info(f"Creating new collection: {self.collection_name}") + self.client.create_collection( + collection_name=self.collection_name, + vectors_config=qdrant_models.VectorParams( + size=self.embedding_dim, + distance=self.distance_metric, + ), + optimizers_config=qdrant_models.OptimizersConfigDiff( + indexing_threshold=10000, + ), + ) + self.logger.info(f"Collection {self.collection_name} created successfully") + except Exception as e: + self.logger.error(f"Error creating collection: {e}") + raise + + def upsert_documents(self, documents: List[Dict[str, Any]]): + """ + Insert or update documents in the vector database. + Args: + documents: List of document dictionaries containing: + - id: Unique identifier + - embedding: Vector embedding + - metadata: Document metadata + - content: Document content + """ + try: + points = [] + for doc in documents: + points.append( + qdrant_models.PointStruct( + id=doc["id"], + vector=doc["embedding"], + payload={ + "content": doc["content"], + "source": doc["metadata"].get("source", ""), + "specialty": doc["metadata"].get("specialty", ""), + "section": doc["metadata"].get("section", ""), + "publication_date": doc["metadata"].get("publication_date", ""), + "medical_entities": doc["metadata"].get("medical_entities", []), + "chunk_number": doc["metadata"].get("chunk_number", 0), + "total_chunks": doc["metadata"].get("total_chunks", 1), + } + ) + ) + self.client.upsert( + collection_name=self.collection_name, + points=points, + wait=True + ) + self.logger.info(f"Successfully upserted {len(documents)} documents") + except Exception as e: + self.logger.error(f"Error upserting documents: {e}") + raise + + # Update the retrieve method to properly return Document objects: + def retrieve(self, query_embedding: np.ndarray, top_k: int = 5, **kwargs) -> List[Document]: + """ + Retrieve similar documents based on query embedding. + + Args: + query_embedding: Query vector + top_k: Number of results to return + **kwargs: Additional parameters (for compatibility) + + Returns: + List of Document objects + """ + try: + # Handle the case where query_embedding might be passed as a dict + if isinstance(query_embedding, dict): + # If it's a dict, it might be from query_processor + # Extract the actual embedding + if 'embedding' in query_embedding: + query_embedding = query_embedding['embedding'] + else: + self.logger.error(f"Invalid query_embedding format: {type(query_embedding)}") + return [] + + # Ensure query_embedding is a list + if isinstance(query_embedding, np.ndarray): + query_vector = query_embedding.tolist() + else: + query_vector = list(query_embedding) + + # Search in Qdrant + search_results = self.client.search( + collection_name=self.collection_name, + query_vector=query_vector, + limit=top_k + ) + + # Convert to Document objects + documents = [] + for result in search_results: + # Extract content from payload + content = result.payload.get('content', '') + + # Create metadata including the score + metadata = {k: v for k, v in result.payload.items() if k != 'content'} + metadata['score'] = result.score + metadata['id'] = str(result.id) + + # Create Document object + doc = Document( + page_content=content, + metadata=metadata + ) + documents.append(doc) + + self.logger.info(f"Retrieved {len(documents)} documents for query") + return documents + + except Exception as e: + self.logger.error(f"Error retrieving documents: {e}") + import traceback + self.logger.error(f"Traceback: {traceback.format_exc()}") + return [] + + def delete_documents(self, document_ids: List[Union[str, int]]): + """ + Delete documents from the vector database by their IDs. + Args: + document_ids: List of document IDs to delete + """ + try: + self.client.delete( + collection_name=self.collection_name, + points_selector=qdrant_models.PointIdsList( + points=document_ids + ), + wait=True + ) + self.logger.info(f"Successfully deleted {len(document_ids)} documents") + except Exception as e: + self.logger.error(f"Error deleting documents: {e}") + raise + + def wipe_collection(self): + """Completely remove and recreate the collection for fresh start.""" + try: + self.client.delete_collection(self.collection_name) + self._ensure_collection() + self.logger.info(f"Collection {self.collection_name} wiped and recreated") + except Exception as e: + self.logger.error(f"Error wiping collection: {e}") + raise + + # Add this method to the QdrantRetriever class: + + def get_collection_info(self) -> Dict: + """Get information about the collection.""" + try: + collection = self.client.get_collection(self.collection_name) + return { + "name": self.collection_name, + "vectors_count": collection.vectors_count, + "points_count": collection.points_count, + "status": collection.status, + "config": { + "size": collection.config.params.vectors.size, + "distance": collection.config.params.vectors.distance + } + } + except Exception as e: + self.logger.error(f"Error getting collection info: {e}") + return {"error": str(e), "vectors_count": 0} + + def get_collection_stats(self) -> Dict[str, Any]: + """ + Retrieve statistics of the collection. + Returns: + Dictionary containing collection statistics. + """ + try: + stats = self.client.get_collection(self.collection_name) + self.logger.info(f"Collection stats retrieved successfully: {stats}") + return stats.model_dump() + except Exception as e: + self.logger.error(f"Error getting collection stats: {e}") + raise + + + # Add these methods to the QdrantRetriever class: + + def add_documents(self, documents: List[Document]) -> int: + """ + Add documents to the vector store. + + Args: + documents: List of documents with embeddings + + Returns: + Number of documents added + """ + if not documents: + return 0 + + points = [] + for idx, doc in enumerate(documents): + if not hasattr(doc, 'metadata') or 'embedding' not in doc.metadata: + self.logger.warning(f"Document {idx} missing embedding, skipping") + continue + + point_id = str(uuid.uuid4()) + embedding = doc.metadata['embedding'] + + # Remove embedding from metadata before storing + metadata = {k: v for k, v in doc.metadata.items() if k != 'embedding'} + metadata['content'] = doc.page_content + + points.append( + qdrant_models.PointStruct( + id=point_id, + vector=embedding, + payload=metadata + ) + ) + + if points: + self.client.upsert( + collection_name=self.collection_name, + points=points + ) + self.logger.info(f"Added {len(points)} documents to vector store") + + return len(points) + + + + def clear_collection(self): + """Clear all documents from the collection.""" + try: + # Delete and recreate the collection + self.client.delete_collection(self.collection_name) + self._create_collection() + self.logger.info(f"Collection {self.collection_name} cleared successfully") + except Exception as e: + self.logger.error(f"Error clearing collection: {e}") + raise \ No newline at end of file diff --git a/agents/shared_rag.py b/agents/shared_rag.py new file mode 100644 index 0000000000000000000000000000000000000000..c917fd5b92a08744db38597cba7d2b518a6ecf08 --- /dev/null +++ b/agents/shared_rag.py @@ -0,0 +1,151 @@ +import logging +import threading +import time +from sentence_transformers import SentenceTransformer +from langchain_google_genai import ChatGoogleGenerativeAI +from config.config import Config +from agents.rag_agent import MedicalRAG + +logger = logging.getLogger(__name__) + +class LazyEmbeddingModel: + """Lazy loading wrapper for SentenceTransformer model""" + def __init__(self, model_name="all-MiniLM-L6-v2"): + self.model_name = model_name + self._model = None + + def _ensure_loaded(self): + if self._model is None: + logger.info(f"📊 Loading embedding model: {self.model_name}...") + self._model = SentenceTransformer(self.model_name) + logger.info("✅ Embedding model loaded successfully") + return self._model + + def encode(self, *args, **kwargs): + model = self._ensure_loaded() + return model.encode(*args, **kwargs) + + def __getattr__(self, name): + # Delegate all other attributes to the actual model + model = self._ensure_loaded() + return getattr(model, name) + +class SharedRAG: + """Singleton RAG instance for sharing across FastAPI and Flask backends.""" + + _instance = None + _lock = threading.Lock() + _initialized = False + _initialization_time = None + + def __new__(cls): + if cls._instance is None: + with cls._lock: + if cls._instance is None: + cls._instance = super(SharedRAG, cls).__new__(cls) + return cls._instance + + def __init__(self): + if not self._initialized: + current_time = time.time() + logger.info("🚀 Initializing shared RAG instance...") + + try: + # Initialize config + self.config = Config() + + # Initialize models (lazy loading for memory optimization) + logger.info("📊 Embedding model will be loaded when needed...") + self.embedding_model = LazyEmbeddingModel("all-MiniLM-L6-v2") + + logger.info("🤖 Loading LLM...") + self.llm = ChatGoogleGenerativeAI( + model="gemini-2.0-flash", + temperature=0.1, + max_tokens=1024 + ) + + # Initialize RAG (will get embedding model when needed) + logger.info("📚 Initializing MedicalRAG...") + # Pass the lazy embedding model + self.rag = MedicalRAG(self.config, self.llm, self.embedding_model) + + # Ensure knowledge is ingested + self._ensure_knowledge_ingested() + + SharedRAG._initialized = True + SharedRAG._initialization_time = current_time + logger.info(f"✅ Shared RAG instance ready in {time.time() - current_time:.2f}s") + + except Exception as e: + logger.error(f"❌ Error initializing SharedRAG: {str(e)}") + raise e + + def get_embedding_model(self): + """Get the embedding model (lazy loading wrapper)""" + return self.embedding_model + + def _ensure_knowledge_ingested(self): + """Ensure knowledge base is populated.""" + try: + # Check if collection has documents + collection_info = self.rag.retriever.get_collection_info() + existing_vectors = collection_info.get('points_count', 0) + + logger.info(f"📋 Collection status: {existing_vectors} documents") + + if existing_vectors == 0: + logger.info("📥 Empty collection, ingesting knowledge...") + result = self.rag.ingest_knowledge_folder("knowledge") + logger.info(f"✅ Ingestion complete: {result.get('successful', 0)} files processed") + else: + # Test retrieval with a simple query using lazy-loaded embedding model + logger.info("✅ Knowledge base ready (testing embedding on-demand)") + # The embedding model will load automatically when needed + + except Exception as e: + logger.error(f"❌ Error checking/ingesting knowledge: {e}") + import traceback + logger.error(traceback.format_exc()) + + def get_rag(self): + """Get the RAG instance.""" + current_time = time.time() + + # Check if recently initialized (within 30 seconds) + if (self._initialized and + self._initialization_time and + (current_time - self._initialization_time) < 30): + logger.info(f"⚡ RAG ready ({current_time - self._initialization_time:.1f}s ago)") + + return self.rag + + def reingest_knowledge(self): + """Force re-ingestion of knowledge base.""" + try: + logger.info("🔄 Force reingesting knowledge...") + self.rag.retriever.clear_collection() + result = self.rag.ingest_knowledge_folder("knowledge") + logger.info(f"✅ Reingestion complete: {result}") + return result + except Exception as e: + logger.error(f"❌ Error reingesting knowledge: {e}") + raise + + def get_status(self): + """Get system status for debugging.""" + try: + collection_info = self.rag.retriever.get_collection_info() + return { + "initialized": self._initialized, + "initialization_time": self._initialization_time, + "collection_points": collection_info.get('points_count', 0), + "crewai_enabled": getattr(self.rag, 'crewai_enabled', False), + "embedding_model": str(type(self.embedding_model)), + "llm_model": str(type(self.llm)) + } + except Exception as e: + return {"error": str(e)} + +# Create singleton instance +shared_rag_instance = SharedRAG() \ No newline at end of file diff --git a/agents/web_search_processor_agent/__init__.py b/agents/web_search_processor_agent/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c198afef50947cc9961fdc07f08dd9ef540201a --- /dev/null +++ b/agents/web_search_processor_agent/__init__.py @@ -0,0 +1,25 @@ +from typing import List, Dict, Any, Optional +from .web_search_processor import WebSearchProcessor + +class WebSearchProcessorAgent: + """ + Agent responsible for processing web search results with structured responses. + """ + + def __init__(self, config=None): + self.web_search_processor = WebSearchProcessor(config) + + def process_web_search_results(self, query: str, chat_history: Optional[List[Dict[str, str]]] = None, + user_context: Optional[Dict] = None) -> str: + """ + Process web search results and return structured response. + + Args: + query: User query + chat_history: Previous conversation history + user_context: User context (emotion, mental health status, etc.) + + Returns: + Structured empathy + solution + recommendations response + """ + return self.web_search_processor.process_query(query, user_context) \ No newline at end of file diff --git a/agents/web_search_processor_agent/__pycache__/__init__.cpython-311.pyc b/agents/web_search_processor_agent/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fbbb641be69683d33c5430540a70c2bcc41fee8 Binary files /dev/null and b/agents/web_search_processor_agent/__pycache__/__init__.cpython-311.pyc differ diff --git a/agents/web_search_processor_agent/__pycache__/geminai_search.cpython-311.pyc b/agents/web_search_processor_agent/__pycache__/geminai_search.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..663aca68b6002b55dad5c74e36ee04d0b24f78df Binary files /dev/null and b/agents/web_search_processor_agent/__pycache__/geminai_search.cpython-311.pyc differ diff --git a/agents/web_search_processor_agent/__pycache__/pubmed_search.cpython-311.pyc b/agents/web_search_processor_agent/__pycache__/pubmed_search.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..338c5e37f55c35f7aaed80b232c5cb03d1f9079f Binary files /dev/null and b/agents/web_search_processor_agent/__pycache__/pubmed_search.cpython-311.pyc differ diff --git a/agents/web_search_processor_agent/__pycache__/tavily_search.cpython-311.pyc b/agents/web_search_processor_agent/__pycache__/tavily_search.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..993722cac4e3e182d7d728462db2bd72dc150436 Binary files /dev/null and b/agents/web_search_processor_agent/__pycache__/tavily_search.cpython-311.pyc differ diff --git a/agents/web_search_processor_agent/__pycache__/web_search_agent.cpython-311.pyc b/agents/web_search_processor_agent/__pycache__/web_search_agent.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..73fb7d5d2eb29c07d27796478d3053ca13c4428e Binary files /dev/null and b/agents/web_search_processor_agent/__pycache__/web_search_agent.cpython-311.pyc differ diff --git a/agents/web_search_processor_agent/__pycache__/web_search_processor.cpython-311.pyc b/agents/web_search_processor_agent/__pycache__/web_search_processor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87ba2b80804000efb3fec65346e199c4e05f4f16 Binary files /dev/null and b/agents/web_search_processor_agent/__pycache__/web_search_processor.cpython-311.pyc differ diff --git a/agents/web_search_processor_agent/geminai_search.py b/agents/web_search_processor_agent/geminai_search.py new file mode 100644 index 0000000000000000000000000000000000000000..136067195fc576365117a0ffd1c5ff7a1585c8b0 --- /dev/null +++ b/agents/web_search_processor_agent/geminai_search.py @@ -0,0 +1,162 @@ +import os +from typing import List, Dict, Optional +import google.generativeai as genai + +class GeminiSearchAgent: + """ + Searches for mental health information using Google's Gemini AI. + """ + def __init__(self): + """Initialize the Gemini search agent.""" + # Get API key from environment or use the one from your main.py + api_key = os.environ.get("GOOGLE_API_KEY", "AIzaSyDzBTzKt211XwMurywdk5HFCnFeeFxcRJ0") + genai.configure(api_key=api_key) + + # Use gemini-1.5-flash which is currently available + self.model = genai.GenerativeModel('gemini-1.5-flash') + + # List available models (for debugging) + try: + models = genai.list_models() + print("Available models:") + for model in models: + if 'generateContent' in model.supported_generation_methods: + print(f" - {model.name}") + except: + pass + + def search_mental_health(self, query: str, max_results: int = 5) -> List[Dict[str, str]]: + """ + Search for mental health information using Gemini AI. + + Args: + query: Search query string + max_results: Maximum number of results to return + + Returns: + List of dictionaries containing search results + """ + try: + # Enhance query for mental health context + enhanced_query = f""" + As a mental health information assistant, provide reliable information about: {query} + + Focus on: + 1. Evidence-based mental health resources + 2. Professional medical sources and recent research + 3. Practical advice and coping strategies + 4. Treatment options and therapeutic approaches + + Please provide specific, actionable information that would be helpful for someone seeking mental health support. + Format the response with clear sections if applicable. + """ + + # Generate content + response = self.model.generate_content(enhanced_query) + + # Extract the response text + main_response = response.text if response.text else "No response generated" + + # Format as a list of results + results = [{ + "title": "Mental Health Information", + "content": main_response, + "source": "Google Gemini AI", + "type": "ai_summary" + }] + + return results + + except Exception as e: + print(f"Error details: {str(e)}") + return [{ + "title": "Error", + "content": f"Error retrieving information: {str(e)}", + "source": "Error", + "type": "error" + }] + + def search_specific_topics(self, query: str, topics: List[str]) -> Dict[str, str]: + """ + Search for specific mental health topics. + + Args: + query: Base query + topics: List of specific topics to include + + Returns: + Dictionary with topic-specific information + """ + results = {} + + for topic in topics: + specific_query = f""" + Provide brief, evidence-based information about: {query} specifically regarding {topic}. + Keep the response concise but informative, focusing on practical advice and current best practices. + """ + try: + response = self.model.generate_content(specific_query) + content = response.text if response.text else "No information available" + results[topic] = content + + except Exception as e: + results[topic] = f"Error: {str(e)}" + + return results + + +# For backward compatibility with existing code +class PubmedSearchAgent(GeminiSearchAgent): + """Alias for GeminiSearchAgent to maintain compatibility.""" + + def search_pubmed(self, query: str, max_results: int = 5) -> List[Dict[str, str]]: + """ + Wrapper method to maintain compatibility with existing code. + Redirects to Gemini search. + """ + # Get results from Gemini + gemini_results = self.search_mental_health(query, max_results) + + # Transform to expected format + formatted_results = [] + for i, result in enumerate(gemini_results): + formatted_results.append({ + "title": result.get("title", "Information from Gemini AI"), + "abstract": result.get("content", "")[:500] + "..." if len(result.get("content", "")) > 500 else result.get("content", ""), + "url": "", # No direct URL with Gemini + "authors": "Google Gemini AI", + "publication_date": "Current", + "full_content": result.get("content", ""), + }) + + return formatted_results + + +# Test function +if __name__ == "__main__": + # Test Gemini search + agent = GeminiSearchAgent() + + print("\nTesting Gemini Search Agent...") + print("-" * 60) + + # Test query + query = "cognitive behavioral therapy for depression" + results = agent.search_mental_health(query) + + for result in results: + print(f"\nTitle: {result['title']}") + print(f"Source: {result['source']}") + print(f"Content: {result['content'][:500]}...") + + # Test specific topics + print("\n" + "=" * 60) + print("Testing specific topics...") + topics_results = agent.search_specific_topics( + "anxiety management", + ["breathing exercises", "medication options", "lifestyle changes"] + ) + + for topic, content in topics_results.items(): + print(f"\n{topic.upper()}:") + print(content[:300] + "...") \ No newline at end of file diff --git a/agents/web_search_processor_agent/web_search_agent.py b/agents/web_search_processor_agent/web_search_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..ab1c48b5fa5b96579af19725f6c833b48e3f42de --- /dev/null +++ b/agents/web_search_processor_agent/web_search_agent.py @@ -0,0 +1,125 @@ +from typing import Dict, List +import sys +import os + +# Add the parent directory to the path for imports +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +# Import both Gemini and PubMed agents +try: + from .geminai_search import GeminiSearchAgent, PubmedSearchAgent +except ImportError: + from agents.web_search_processor_agent.geminai_search import GeminiSearchAgent, PubmedSearchAgent + +class WebSearchAgent: + """ + Agent responsible for retrieving real-time medical information from web sources. + Uses Gemini AI for general queries and maintains PubMed compatibility. + """ + + def __init__(self, config=None): + """Initialize the web search agent.""" + # Initialize both search agents + self.gemini_agent = GeminiSearchAgent() + self.pubmed_search_agent = PubmedSearchAgent() # This is actually GeminiSearchAgent too + self.config = config + + def search(self, query: str) -> str: + """ + Perform searches using Gemini AI. + + Args: + query: Search query string + + Returns: + Formatted search results + """ + print(f"[WebSearchAgent] Searching with Gemini for: {query}") + + try: + # Use Gemini search for better results + results = self.gemini_agent.search_mental_health(query) + + # Format the results + formatted_results = self._format_gemini_results(results) + + return formatted_results + + except Exception as e: + print(f"[WebSearchAgent] Error during search: {str(e)}") + # Fallback to basic response + return self._get_fallback_response(query) + + def _format_gemini_results(self, results: List[Dict[str, str]]) -> str: + """Format Gemini results for display.""" + if not results: + return "No relevant information found." + + # Check for error results + if results[0].get("type") == "error": + return results[0].get("content", "Error occurred during search") + + formatted_output = [] + + for result in results: + if result.get("type") == "ai_summary": + # Format AI-generated content + content = result.get("content", "") + formatted_output.append(content) + else: + # Format other types of results + title = result.get("title", "Information") + content = result.get("content", "") + source = result.get("source", "Unknown") + + formatted_output.append(f"**{title}**\n*Source: {source}*\n\n{content}") + + return "\n\n".join(formatted_output) + + def _get_fallback_response(self, query: str) -> str: + """Provide a fallback response when search fails.""" + return f"""I apologize, but I couldn't retrieve specific information about "{query}" at this moment. + +Here are some general suggestions: +1. Consider consulting with a mental health professional +2. Visit reputable mental health websites like NIMH or WHO +3. Speak with your healthcare provider for personalized advice + +Would you like to rephrase your question or ask about something else?""" + + def search_mental_health_specific(self, query: str) -> str: + """ + Search specifically for mental health related content. + + Args: + query: Search query string + + Returns: + Formatted search results with mental health focus + """ + # The GeminiSearchAgent already adds mental health context + return self.search(query) + + def search_with_topics(self, query: str, topics: List[str]) -> str: + """ + Search for specific topics using Gemini. + + Args: + query: Base query + topics: List of specific topics + + Returns: + Formatted results by topic + """ + try: + results = self.gemini_agent.search_specific_topics(query, topics) + + formatted = [f"**Information about {query}:**\n"] + for topic, content in results.items(): + formatted.append(f"\n### {topic.title()}\n{content}") + + return "\n".join(formatted) + + except Exception as e: + print(f"[WebSearchAgent] Error in topic search: {str(e)}") + return self._get_fallback_response(query) \ No newline at end of file diff --git a/agents/web_search_processor_agent/web_search_processor.py b/agents/web_search_processor_agent/web_search_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..03276c9554ba9dd34a92572d49d1329a4011a617 --- /dev/null +++ b/agents/web_search_processor_agent/web_search_processor.py @@ -0,0 +1,116 @@ +import os +from typing import Dict, List, Optional + +try: + from .web_search_agent import WebSearchAgent +except ImportError: + from agents.web_search_processor_agent.web_search_agent import WebSearchAgent + +class WebSearchProcessor: + """ + Processes web search results using structured empathy + solution + recommendations format. + """ + + def __init__(self, config=None): + self.config = config + try: + self.web_search_agent = WebSearchAgent() + except Exception as e: + print(f"⚠️ Web search agent not available: {e}") + self.web_search_agent = None + + def process_query(self, query: str, user_context: Optional[Dict] = None) -> str: + """ + Process query with structured empathy + solution + recommendations format. + + Args: + query: User query + user_context: User context including emotion, mental_health_status, name + + Returns: + Structured response string + """ + try: + query_lower = query.lower() + user_context = user_context or {} + + emotion = user_context.get("emotion", "neutral") + mental_health_status = user_context.get("mental_health_status", "Unknown") + user_name = user_context.get("name", "there") + + # Generate structured response based on query content + if any(word in query_lower for word in ["depressed", "depression", "sad"]): + empathy = f"I can hear that you're going through a really difficult time with sadness and depression, {user_name}. Those feelings can be overwhelming and exhausting, and I want you to know that what you're experiencing is completely valid." + solution = "Depression often involves changes in brain chemistry that affect mood, energy, and motivation. Professional treatment through therapy and/or medication has proven very effective for most people experiencing depression." + recommendations = "I strongly recommend reaching out to a mental health professional who can provide proper assessment and treatment. In the meantime, try to maintain basic self-care routines, connect with supportive people in your life, and consider contacting the National Mental Health Program at 1717 if you need immediate support." + + elif any(word in query_lower for word in ["anxious", "anxiety", "worried", "panic"]): + empathy = f"I understand that anxiety can feel incredibly overwhelming and scary, {user_name}. Those racing thoughts and physical sensations are very real and can be exhausting to deal with." + solution = "Anxiety is one of the most treatable mental health conditions. Techniques like deep breathing, grounding exercises, and cognitive behavioral therapy have strong research support for managing anxiety symptoms." + recommendations = "Try practicing 4-7-8 breathing (inhale 4, hold 7, exhale 8), limit caffeine intake, and consider speaking with a counselor who specializes in anxiety treatment. Regular exercise and mindfulness can also significantly help." + + elif any(word in query_lower for word in ["stress", "overwhelm", "pressure"]): + empathy = f"It sounds like you're dealing with a lot of stress right now, {user_name}. That pressure can really take a toll on your mental and physical well-being." + solution = "Stress management involves both addressing the source of stress and building your capacity to handle pressure. Identifying specific stressors and developing coping strategies can make a significant difference." + recommendations = "Try breaking down overwhelming tasks into smaller steps, practice saying no to additional commitments, and schedule regular breaks. Consider stress-reduction techniques like meditation, exercise, or talking to a therapist." + + elif any(word in query_lower for word in ["lonely", "alone", "isolated"]): + empathy = f"I hear that you're feeling lonely, {user_name}. Loneliness can be very difficult to experience, and you're showing strength by reaching out." + solution = "Loneliness is a common human experience that can be addressed through building meaningful connections and developing a support network." + recommendations = "Consider joining community groups, volunteering, or participating in activities you enjoy. Online support groups can also provide connection. If loneliness persists, talking to a counselor can help develop strategies for building relationships." + + elif any(word in query_lower for word in ["sleep", "tired", "exhausted", "insomnia"]): + empathy = f"It sounds like you're having difficulties with sleep or feeling tired, {user_name}. Sleep issues can significantly impact mental health and daily functioning." + solution = "Sleep problems often have both physical and mental health components. Good sleep hygiene and addressing underlying stress or anxiety can improve sleep quality." + recommendations = "Try maintaining a consistent sleep schedule, limiting screen time before bed, and creating a relaxing bedtime routine. Avoid caffeine late in the day. If sleep problems persist, consider consulting a healthcare provider." + + else: + # Generic structured response + empathy = f"Thank you for reaching out and sharing what you're going through, {user_name}. I can sense that you're dealing with some challenges right now, and I want you to know that your experience matters." + solution = "There are evidence-based strategies and resources available that can help you manage these feelings and improve your mental health over time." + recommendations = "Consider speaking with a mental health professional for personalized support, practice daily self-care activities, and don't hesitate to reach out to trusted friends or family members for additional support." + + # Combine the structured response + structured_response = f"{empathy}\n\n{solution}\n\n{recommendations}" + + # Add crisis resources for severe cases + if (mental_health_status in ["Severe", "Crisis"] or + any(word in query_lower for word in ["suicide", "kill myself", "want to die", "hurt myself"])): + crisis_addition = "\n\n**🆘 Immediate Support Available:** If you're having thoughts of self-harm, please contact the National Mental Health Program at 1717 (24/7) or Emergency Services at 112 immediately." + structured_response += crisis_addition + + return structured_response + + except Exception as e: + print(f"[WebSearchProcessor] Error: {e}") + return f"I'm here to support you, {user_context.get('name', 'there')}, though I'm having some technical difficulties. Please know that whatever you're going through, there are people and resources available to help. For immediate support in Bhutan, contact the National Mental Health Program at 1717." + + def process_web_results(self, query: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str: + """ + Legacy method for compatibility. + """ + # Build context from chat history + user_context = {} + if chat_history and len(chat_history) > 0: + # Extract user context from recent messages + recent_messages = chat_history[-3:] if len(chat_history) > 3 else chat_history + for msg in recent_messages: + if msg.get("role") == "user": + content = msg.get('content', '').lower() + # Simple emotion detection + if any(word in content for word in ['sad', 'depressed', 'down']): + user_context['emotion'] = 'sad' + elif any(word in content for word in ['anxious', 'worried', 'panic']): + user_context['emotion'] = 'anxious' + elif any(word in content for word in ['angry', 'frustrated', 'mad']): + user_context['emotion'] = 'angry' + + return self.process_query(query, user_context) + +# Convenience function for direct use +def search_mental_health_info(query: str, user_context: Optional[Dict] = None) -> str: + """ + Direct function to search for mental health information. + """ + processor = WebSearchProcessor() + return processor.process_query(query, user_context) \ No newline at end of file diff --git a/chat_sessions/chat_Harsh Narayan_20250802_165208.json b/chat_sessions/chat_Harsh Narayan_20250802_165208.json new file mode 100644 index 0000000000000000000000000000000000000000..d7b241e3553d660afb8e13fdbed166f0a53778ae --- /dev/null +++ b/chat_sessions/chat_Harsh Narayan_20250802_165208.json @@ -0,0 +1,24 @@ +{ + "session_id": "20250802_165208", + "user_name": "Harsh Narayan", + "messages": [ + { + "content": "Now I'm upset.", + "role": "user", + "timestamp": "2025-08-02T16:52:08.426564", + "topics": [] + }, + { + "agent": "RAG_AGENT", + "content": "I understand you're feeling upset right now. It's completely valid to experience a range of emotions, and I'm here to listen without judgment. It takes courage to acknowledge when you're feeling this way.\n\nResearch shows that sometimes upsetting feelings can arise from internal thoughts and judgments, as one source mentions getting angry and critical with oneself. It can help to practice simply listening to the voice of your thoughts without immediately judging them or trying to shut them down. This can create some space between you and the upsetting feelings.\n\nI recommend considering what might have triggered this feeling and if you can identify any specific thoughts or situations connected to it. If these feelings persist or become overwhelming, I suggest reaching out to a mental health professional for further support. Remember, taking care of your mental well-being is essential.\n", + "role": "assistant", + "timestamp": "2025-08-02T16:52:12.085676", + "topics": [ + "depression" + ] + } + ], + "metadata": { + "topics": [] + } +} \ No newline at end of file diff --git a/chat_sessions/chat_Harsh Narayan_20250804_163618.json b/chat_sessions/chat_Harsh Narayan_20250804_163618.json new file mode 100644 index 0000000000000000000000000000000000000000..ddb31dd70870382d7d3c2ee3d3f036a23a79b3b3 --- /dev/null +++ b/chat_sessions/chat_Harsh Narayan_20250804_163618.json @@ -0,0 +1,27 @@ +{ + "session_id": "20250804_163618", + "user_name": "Harsh Narayan", + "messages": [ + { + "content": "I am failing sad, what should I do?", + "role": "user", + "timestamp": "2025-08-04T16:36:18.921585", + "topics": [ + "depression" + ] + }, + { + "agent": "RAG_AGENT", + "content": "I understand you're feeling sad about failing, and that's a really tough feeling to experience. It's completely valid to feel down when things don't go as planned. I'm here to offer support.\n\nRemembering the opposite of your negative feelings can help; if things feel unpleasant, try to focus on peace and relaxation, letting go of the need to do anything. Research shows that being mindful of when you get carried away by negative thoughts and stopping yourself can also be beneficial. Living each day with the intention of bringing joy and kindness can also help shift your perspective.\n\nI recommend considering practicing these techniques when you feel overwhelmed. Since I don't know your full situation, I suggest reaching out to a mental health professional for personalized support and guidance. They can help you explore these feelings further and develop coping strategies.\n", + "role": "assistant", + "timestamp": "2025-08-04T16:36:21.540544", + "topics": [ + "depression", + "coping" + ] + } + ], + "metadata": { + "topics": [] + } +} \ No newline at end of file diff --git a/chat_sessions/chat_string_string.json b/chat_sessions/chat_string_string.json new file mode 100644 index 0000000000000000000000000000000000000000..c7d9c1f49ae25f0654f8499b7a9d3e07fa521a36 --- /dev/null +++ b/chat_sessions/chat_string_string.json @@ -0,0 +1,8 @@ +{ + "session_id": "string", + "user_name": "string", + "messages": [ + {} + ], + "metadata": {} +} \ No newline at end of file diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/config/__pycache__/__init__.cpython-311.pyc b/config/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc022652162a3e4a4ff7e575511376ac3fccf104 Binary files /dev/null and b/config/__pycache__/__init__.cpython-311.pyc differ diff --git a/config/__pycache__/config.cpython-311.pyc b/config/__pycache__/config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db7261bb4a9866c8b2fe3fa7ba257c4794338ae4 Binary files /dev/null and b/config/__pycache__/config.cpython-311.pyc differ diff --git a/config/agents.yaml b/config/agents.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37992a45310aa637fb54beb0dd428ea6602aaa73 --- /dev/null +++ b/config/agents.yaml @@ -0,0 +1,42 @@ +emotion_detector: + role: Emotion Detector + goal: Analyze user input to determine their emotional state. + backstory: You are an empathetic AI skilled at identifying emotions like sadness, anxiety, or happiness from text. + memory: true + verbose: false + +suggestion_agent: + role: Suggestion Provider + goal: Offer 1-2 brief and actionable suggestions to support the user's mental well-being. + backstory: You are a compassionate advisor who shares short mental health tips like breathing exercises, journaling, but also offers to gently guide the user through them if they seem open to it. + memory: true + verbose: false + +safety_agent: + role: Safety Monitor + goal: Check user input for signs of crisis or self-harm and respond appropriately. + backstory: You are a vigilant guardian trained to detect red flags like mentions of self-harm, prioritizing user safety. + memory: true + verbose: false + +orchestrator: + role: Workflow Orchestrator + goal: Coordinate agent tasks to ensure a cohesive and empathetic response, prioritizing safety if needed. + backstory: You are a strategic coordinator ensuring the chatbot responds with clarity and empathy, tailoring response length naturally. + memory: true + verbose: false + +rag_retriever: + role: Knowledge Retriever + goal: Retrieve relevant mental health tips from the text corpus based on the user's emotional state. + backstory: You are a librarian who finds concise, relevant resources using a custom RAG pipeline. + memory: true + verbose: false + +rag_reader: + role: Knowledge Summarizer + goal: Summarize retrieved tips clearly and briefly for the user's emotional state. + backstory: You are an editor who distills helpful advice into brief, clear summaries. + memory: true + verbose: false + diff --git a/config/config.py b/config/config.py new file mode 100644 index 0000000000000000000000000000000000000000..e3578da55ee818d70eedef3fa8ac2ff57fb7421a --- /dev/null +++ b/config/config.py @@ -0,0 +1,164 @@ +""" +Configuration loader for the Mental Health Chatbot +""" + +import os +import yaml +from dataclasses import dataclass +from typing import Any, Dict, Optional +from pathlib import Path + +from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_google_genai import GoogleGenerativeAIEmbeddings +from sentence_transformers import SentenceTransformer + + +@dataclass +class RAGConfig: + """Configuration for RAG agent""" + def __init__(self, config_dict: Dict[str, Any]): + self.config_dict = config_dict + self.llm = ChatGoogleGenerativeAI( + model="gemini-1.5-flash", + temperature=0.3, + google_api_key=os.environ.get("GOOGLE_API_KEY") + ) + + # Load from YAML if available + if 'rag' in config_dict: + rag_config = config_dict['rag'] + self.embedding_dim = rag_config.get('embedding_dim', 384) + + # Lazy load SentenceTransformer for memory optimization + self.embedding_model = None + self.embedding_model_name = "all-MiniLM-L6-v2" + + self.collection_name = rag_config.get('collection_name', 'mental_health_docs') + self.chunk_size = rag_config.get('chunk_size', 256) + self.chunk_overlap = rag_config.get('chunk_overlap', 32) + self.reranker_model = rag_config.get('reranker_model', 'cross-encoder/ms-marco-MiniLM-L-6-v2') + self.reranker_top_k = rag_config.get('reranker_top_k', 5) + self.max_context_length = rag_config.get('max_context_length', 2048) + self.include_sources = rag_config.get('include_sources', True) + self.use_local = rag_config.get('use_local', True) + self.url = rag_config.get('url', 'http://localhost:6333') + self.distance_metric = rag_config.get('distance_metric', 'Cosine') # Changed to 'Cosine' + self.min_retrieval_confidence = rag_config.get('min_retrieval_confidence', 0.85) + + # Add missing attributes + self.processed_docs_dir = rag_config.get('processed_docs_dir', 'processed_docs') + self.knowledge_dir = rag_config.get('knowledge_dir', 'knowledge') + else: + # Default values if no YAML config + self.embedding_dim = 384 + self.embedding_model = None + self.embedding_model_name = "all-MiniLM-L6-v2" + self.collection_name = 'mental_health_docs' + self.chunk_size = 256 + self.chunk_overlap = 32 + self.reranker_model = 'cross-encoder/ms-marco-MiniLM-L-6-v2' + self.reranker_top_k = 5 + self.max_context_length = 2048 + self.include_sources = True + self.use_local = True + self.url = 'http://localhost:6333' + self.distance_metric = 'Cosine' # Changed to 'Cosine' + self.min_retrieval_confidence = 0.85 + self.processed_docs_dir = 'processed_docs' + self.knowledge_dir = 'knowledge' + + self.context_limit = 4 + + def get_embedding_model(self): + """Lazy load the embedding model only when needed""" + if self.embedding_model is None: + try: + from sentence_transformers import SentenceTransformer + self.embedding_model = SentenceTransformer(self.embedding_model_name) + print(f"✅ Embedding model loaded: {self.embedding_model_name}") + except Exception as e: + print(f"⚠️ Failed to load embedding model: {e}") + return None + return self.embedding_model + + +@dataclass +class ConversationConfig: + """Configuration for Conversation agent""" + def __init__(self): + self.llm = ChatGoogleGenerativeAI( + model="gemini-1.5-flash", + temperature=0.7, + google_api_key=os.environ.get("GOOGLE_API_KEY") + ) + + +@dataclass +class WebSearchConfig: + """Configuration for Web Search agent""" + def __init__(self): + self.context_limit = 4 + self.llm = ChatGoogleGenerativeAI( + model="gemini-1.5-flash", + temperature=0.5, + google_api_key=os.environ.get("GOOGLE_API_KEY") + ) + # Add Tavily API key configuration + self.tavily_api_key = os.environ.get("TAVILY_API_KEY", "tvly-your-api-key-here") + + +@dataclass +class AgentDecisionConfig: + """Configuration for Agent Decision system""" + def __init__(self): + self.llm = ChatGoogleGenerativeAI( + model="gemini-2.0-flash", + temperature=0, + google_api_key=os.environ.get("GOOGLE_API_KEY") + ) + + +class Config: + """Main configuration class that loads from YAML files""" + + def __init__(self): + # Set API keys + os.environ["GOOGLE_API_KEY"] = "AIzaSyDzBTzKt211XwMurywdk5HFCnFeeFxcRJ0" + os.environ["TAVILY_API_KEY"] = "tvly-your-api-key-here" # You need to replace this + + # Load YAML configurations + self.config_dict = self._load_yaml_configs() + + # Initialize configurations + self.rag = RAGConfig(self.config_dict) + self.conversation = ConversationConfig() + self.web_search = WebSearchConfig() + self.agent_decision = AgentDecisionConfig() + + # General settings + self.max_conversation_history = 20 + + def _load_yaml_configs(self) -> Dict[str, Any]: + """Load all YAML configuration files""" + config_dict = {} + config_dir = Path(__file__).parent + + # Load each YAML file + yaml_files = ['agents.yaml', 'rag.yaml', 'tasks.yaml'] + for yaml_file in yaml_files: + file_path = config_dir / yaml_file + if file_path.exists(): + with open(file_path, 'r') as f: + data = yaml.safe_load(f) + if data: + config_dict.update(data) + + return config_dict + + def get_agent_config(self, agent_name: str) -> Dict[str, Any]: + """Get configuration for a specific agent""" + return self.config_dict.get(agent_name, {}) + + def get_task_config(self, task_name: str) -> Dict[str, Any]: + """Get configuration for a specific task""" + return self.config_dict.get(task_name, {}) \ No newline at end of file diff --git a/config/rag.yaml b/config/rag.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3009fd2cb8605abc9a7bc4b045d2f363a464ca6d --- /dev/null +++ b/config/rag.yaml @@ -0,0 +1,26 @@ +rag: + collection_name: "mental_health_docs" + embedding_model: "all-MiniLM-L6-v2" + embedding_dim: 384 + chunk_size: 256 + chunk_overlap: 32 + processed_docs_dir: "processed_docs" + knowledge_dir: "knowledge" + + # Qdrant settings + use_local: true + url: "http://localhost:6333" + distance_metric: "Cosine" + + # Optimized reranker settings for speed + reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2" + reranker_top_k: 3 # Reduced from 5 to 3 for faster processing + + # Response generation + max_context_length: 1024 # Reduced from 2048 for faster processing + response_format_instructions: "" + include_sources: true + min_retrieval_confidence: 0.5 # Increased from 0.4 for better quality/speed trade-off + top_k: 3 # Reduced from 5 for faster retrieval + max_context_docs: 3 # Reduced for faster processing + diff --git a/config/tasks.yaml b/config/tasks.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3690660135a491fd02bd765029ef0ea9ed1d7e9c --- /dev/null +++ b/config/tasks.yaml @@ -0,0 +1,79 @@ +emotion_detection_task: + description: > + Analyze the user's input: "{user_input}" + Identify their primary emotion (e.g., sadness, anxiety, neutral) and provide a short emotional summary. + expected_output: > + A short emotion label, e.g., "User feels anxious." + output_key: emotion_summary + agent: emotion_detector + +safety_check_task: + description: > + Review the user's message and conversation context. + Determine whether it contains signs of emotional distress or crisis using your mental health knowledge base. + Respond in JSON format: + { + "distress": true/false, + "trigger": "quoted text from user input if applicable" + } + expected_output: > + Structured JSON with distress flag and trigger phrase if any. + output_key: safety_result + agent: safety_agent + context: + - emotion_detection_task + +rag_retrieval_task: + description: > + Based on the user's emotional state, retrieve concise mental health tips from the corpus. + expected_output: > + 1–2 short tips, e.g., "Take a walk in nature. Try deep breathing exercises." + output_key: retrieved_tips + agent: rag_retriever + context: + - emotion_detection_task + +rag_reading_task: + description: > + Summarize the tips retrieved in a helpful and brief tone. + expected_output: > + A 1-2 line practical summary of the tips. + output_key: summarized_tips + agent: rag_reader + context: + - emotion_detection_task + - rag_retrieval_task + +suggestion_task: + description: > + Give 1-2 practical, empathetic suggestions based on emotional state and retrieved tips. + If appropriate, gently offer further help such as: + "Would you like me to walk you through a breathing exercise?" or + "Shall I guide you with some mindfulness steps?" + expected_output: > + 1-2 short actionable suggestions (1-2 lines), optionally followed by a gentle offer for guided support. + E.g., "Try a short breathing exercise. Take a break from screens. Would you like me to walk you through one?" + output_key: suggestions + agent: suggestion_agent + context: + - emotion_detection_task + - safety_check_task + - rag_retrieval_task + - rag_reading_task + +orchestrate_task: + description: > + Respond to: "{user_input}" with an adaptive tone and length. + - For greetings or neutral input: be short and friendly. + - For moderate emotions (e.g., anxiety, stress): respond with 5-6 concise lines. + - For crisis-level input: give a longer, supportive message with clear help suggestions. + expected_output: > + A concise, human-like response adapted to the emotional tone and urgency. + output_key: final_response + agent: orchestrator + context: + - emotion_detection_task + - safety_check_task + - rag_retrieval_task + - rag_reading_task + - suggestion_task diff --git a/crew_ai/README.md b/crew_ai/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07ad074b1e6dc22f9784f00f80a07786534807f9 --- /dev/null +++ b/crew_ai/README.md @@ -0,0 +1,164 @@ +# DrukCare +A skeleton for CrewAI agents for providing mental health assistance. +## Tagline: Empowering Mental Well-being with Intelligent and Culturally Sensitive Support. + +# 1. About +DrukCare AI is an intelligent chatbot application designed to provide empathetic and personalized mental health assistance, specifically tailored for the context of Bhutan. Leveraging the CrewAI framework, this system orchestrates a team of specialized AI agents to guide users through various stages of support, from crisis detection and profile collection to dynamic mental health assessments and personalized recommendations. + +The project aims to offer accessible, initial mental health guidance, respecting user privacy and cultural nuances, while adhering to ethical guidelines. + +# 2. Features + +1. Crisis Detection: Identifies suicidal ideation or mental health emergencies. +2. Condition Classification: Detects depression, anxiety, substance use, or general mental health concerns. +3. Interactive Assessments: Conducts PHQ-9, GAD-7, and DAST-10 questionnaires and included questionnaire for alcohol use and bipolar syndrome. +4. Personalized Recommendations: Offers suggestions including helplines, therapy options, mindfullness, meditation buddha and meditation practices. +5. RAG-Driven Retrieval: Retrieves relevant documents using a vector store (PostgreSQL pgvector) hosted on Supabase. +6. User Profile Integration: Adapts recommendations using user history and preferences. + +# 3. Workflow + +The DrukCare AI operates as a sequential CrewAI process, ensuring a structured and coherent user interaction flow: + +1. User inputs a query. +2. Crisis detection checks for emergencies. +3. User profile is retrieved or mocked. +4. Condition classifier suggests a condition and assessment. +5. User confirms and takes the questionnaire. +6. Score is calculated and interpreted. +7. Final recommendation is generated (optionally enhanced via RAG). + +![Alt text]("data\flow_chart.png" "Flow diagram") + +## Crisis Detection: + +Input: User's initial query. + +Action: The Crisis Detection Specialist agent analyzes the input for emergency signs. Used the trained model from hugging face as a tool to detect whether the query is crisis or no-crisis. + +Output: Crisis or No-crisis + +## User Profile Collection: + +Input: User's query and status from Crisis Detection. + +Action: Queries the user profile from PostGreSQL database. + +Output: A structured user profile in json format. + +## Knowledge Retrieval & Query Refinement (RAG): + +Input: User's initial query and collected user profile. + +Action: The Knowledge Base Manager & Query Refiner agent interprets the user's intent, formulating specific keywords for the vector database. It then retrieves relevant mental health information and identifies a potential condition (e.g., 'depression', 'anxiety'). + +Output: Relevant mental health recommendations and the identified condition. + +## Conditional Assessment: + +Input: Identified condition from mental condition classifier crew. + +Action: The Mental Health Assessment Specialist agent determines if an assessment is relevant (e.g., PHQ-9 for depression, GAD-7 for anxiety). If relevant, it seeks explicit user consent. If consent is given, it administers the questionnaire step-by-step. + +Output: Assessment status (completed, skipped, denied) and results (score, interpretation) if completed. + +## Personalized Recommendation: + +Input: Original user query, collected user profile, RAG results, and assessment results. + +Action: The Personalized Recommendation Engine synthesizes all gathered information to generate highly personalized, empathetic, and actionable mental health recommendations, culturally adapted for Bhutan. + +Output: The final comprehensive recommendation to the user. + +# 4. Architecture/Components +The application is built using the CrewAI framework, comprising Agents, Tasks, and Tools. + +## 4.1. Agents + +1. **CrisisDetectionAgent:** Detects urgent crisis signals in user queries. +2. **Mental Condition Classifier Agent:** Identifies the likely mental condition. +3. **Data Retriever Agent:** Fetches user demographic and mental health background. +4. **RecommendationAgent:** Synthesizes all information and provides actionable recommendations. +5. **RAG Agent:** Retrieves external documents to augment the response context (RAG). + +## 4.2. Tasks + +1. *Crisis detection task:* The crisis condition will be detected using the ‘crisis classifier tool’ . This task is performed by the Crisis Detection agent. Input will be the ‘user_query’. +2. *Mental condition classifier task:* This will classify the mental health condition from the ‘user_query’ (say anxiety, depression, substance abuse etc) and recommend the questionnaire (for example, if the detected mental health condition is ‘anxiety’, ‘GAD-7’ questionnaire is recommended to the user). This is made optional for the user. This task is performed by Mental Condition Classifier Agent. +3. *Data retriever task:* This will retrieve the user profile provided the ‘user_profile_id’ from the database. This task is performed by Data Retriever Agent. It has access to the tool to help accomplish the desired task. +4. *Rag task:* This first retrieves the keywords from the user query, refines the user query if needed and fetches the documents from the vector DB using semantic search. +5. *Recommendation task:* This is for providing a comprehensive recommendation based on the user profile, condition detected, assessment results and the retrieved documents from vector database. This also has the tool to fetch the helplines for handling crisis situations and also for severe mental health conditions. The agent performing this task is Recommendation Agent. + +## 4.3. Crews + +1. **Crisis_management_crew** (crisis detection agent): Runs crisis detection logic. +2. **Mental_condition_crew** (mental condition classifier agent): Classifies user's mental health condition. +3. **Data_retrieval_crew** (data retriever agent): Retrieves and formats user profile data. +4. **Recommendation_crew** (rag agent + recommendation agent): Generates final recommendations using inputs from other agents and RAG. + + +## 4.4 Tools + +1. *Bhutanese Helplines:* Provides a predefined list of mental health helplines relevant to Bhutan. + +2. *Vector Database Operations:* Retrieval from vector database for mental health recommendations. Uses 'sentence-transformers/ all-MiniLM-L6-v2' model for embedding queries. + +3. *Data Retrieval:* To fetch the user profiles from PostGreSQL database. Now it contains some dummy user profiles. The features include: age, gender, city_region, profession, marital status, previous mental diagnosis, ethnicity. + +4. *Crisis Classifier:* Uses 'sentinet/suicidality' model to detect the crisis condition. Note: lmsdmn/crisis-detection-model model has been finetuned on the specific dataset. If this model is needed to be used, we need to define a new tool for it. + +# 5. Usage + +1. Create a python virtual environment. + +2. Install the dependencies from requirements.txt by running the below command in your terminal: + + pip install -r requirements.txt + +3. Then, run the below command in your terminal: + + python chatbot.py + +The console output will show the detailed steps of how agents interact, tools are used, and the final recommendations are generated for each simulated user input. + +## 5.1. LLM API Key Setup +Crucially, DrukCare AI relies on a Language Model (LLM) to function. + +You need to set up your LLM provider's API key. For example, if you are using OpenAI: + +1. Obtain an API key from your chosen LLM provider (e.g., OpenAI API Keys). + +2. Set it as an environment variable: + + export OPENAI_API_KEY="YOUR_API_KEY_HERE" # On macOS/Linux + Or for Windows (in Command Prompt): + set OPENAI_API_KEY="YOUR_API_KEY_HERE" + In PowerShell: + $env:OPENAI_API_KEY="YOUR_API_KEY_HERE" + +Alternatively, you can hardcode it in your script (for local testing, not recommended for production): + + os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY_HERE" + +3. Make sure your selected LLM matches the model_name you are using. + +## 5.2 Setting up connection to Supabase + +Setup the SUPABASE_DB_URI in your environment in .env file. SUPABASE_DB_URI can be requested from Alaa or Adelia. + +## 5.3 Langsmith setup for Tracing and Monitoring + +Prerequisites: + +1. LangSmith Account and Project: Ensure you have a LangSmith account and a project where your LLM runs are being traced. Setting LANGSMITH_TRACING=true and LANGSMITH_PROJECT environment variables usually handles this. +2. API Key: Have your LANGSMITH_API_KEY ready. +3. LLM Provider API Key: Since your evaluator will be an LLM, you'll need the API key for the LLM provider you plan to use for the evaluation (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY). You'll need to add these as "Secrets" in LangSmith (Settings -> Secrets -> Add Secret). + + +# Disclaimer + +This DrukCare AI chatbot is designed for informational and initial supportive purposes only. It is not a substitute for professional medical advice, diagnosis, or treatment. Always seek the advice of a qualified mental health professional for any questions you may have regarding a medical condition. If you are in a crisis situation, please contact the provided helplines immediately. + +# License + +Will be updated later. diff --git a/crew_ai/__init__.py b/crew_ai/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/crew_ai/__pycache__/__init__.cpython-311.pyc b/crew_ai/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c921b34675ac671596b5ffe83dee9b0fe101105d Binary files /dev/null and b/crew_ai/__pycache__/__init__.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/chatbot.cpython-311.pyc b/crew_ai/__pycache__/chatbot.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6b371ff1d883410d27cb914d4b78211340713fe Binary files /dev/null and b/crew_ai/__pycache__/chatbot.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/config.cpython-311.pyc b/crew_ai/__pycache__/config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d4ca376d3e49d8cde1eea438b452e231187fb08 Binary files /dev/null and b/crew_ai/__pycache__/config.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/data_retriever_util.cpython-311.pyc b/crew_ai/__pycache__/data_retriever_util.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7b6d6d7def99a192b125632ae07a86cb9d2ec6c Binary files /dev/null and b/crew_ai/__pycache__/data_retriever_util.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/fastapi_integration.cpython-311.pyc b/crew_ai/__pycache__/fastapi_integration.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fcc734b182d6598efa329ec7f429cd63fdc348c Binary files /dev/null and b/crew_ai/__pycache__/fastapi_integration.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/full_chat_flow.cpython-311.pyc b/crew_ai/__pycache__/full_chat_flow.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f62ac5c55f8d0ce42643e43fce989847f43a76b6 Binary files /dev/null and b/crew_ai/__pycache__/full_chat_flow.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/llm_setup.cpython-311.pyc b/crew_ai/__pycache__/llm_setup.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..969b0660e2858075cd448771ef0341bd14e3ab10 Binary files /dev/null and b/crew_ai/__pycache__/llm_setup.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/questionnaire.cpython-311.pyc b/crew_ai/__pycache__/questionnaire.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2f1b961d18c7ac81fc747ee44613046eeddd4e0c Binary files /dev/null and b/crew_ai/__pycache__/questionnaire.cpython-311.pyc differ diff --git a/crew_ai/__pycache__/tools.cpython-311.pyc b/crew_ai/__pycache__/tools.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1756d317a138f1b801535ed9e2899fd7abc21a2 Binary files /dev/null and b/crew_ai/__pycache__/tools.cpython-311.pyc differ diff --git a/crew_ai/chatbot.py b/crew_ai/chatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..350be7449773223e970e874aad31de3ba97c9905 --- /dev/null +++ b/crew_ai/chatbot.py @@ -0,0 +1,362 @@ +import json +from dotenv import load_dotenv +from crewai import Agent, Task, Crew +from pydantic import BaseModel, Field +# from langsmith import traceable +from textwrap import dedent +from typing import Optional + +# Load environment variables +load_dotenv() + +# ======================= CONFIGURATION ======================= +from crew_ai.tools import MentalHealthTools, QueryVectorStoreTool, DataRetrievalTool, CrisisClassifierTool, MentalConditionClassifierTool +from crew_ai.llm_setup import get_llm +from crew_ai.questionnaire import load_questionnaires, conduct_assessment +from crew_ai.config import get_config + +# Load config values +config = get_config() + +# LLM Initialization +llm = get_llm() + +# Tool Setup +mental_health_tools = MentalHealthTools() +crisis_classifier_tool = CrisisClassifierTool() +query_vector_store = QueryVectorStoreTool() +data_retriever_tool = DataRetrievalTool() +mental_condition_classifier_tool = MentalConditionClassifierTool() + +# ======================= ASSESSMENT QUESTIONNAIRES ======================= +QUESTIONS = load_questionnaires() + +# ======================= OUTPUT SCHEMAS ======================= +class CrisisDetectionOutput(BaseModel): + is_crisis: bool = Field(description="True if the query indicates a mental health crisis.") + explanation: str = Field(description="Reason for classifying as crisis or not.") + +class MentalConditionOutput(BaseModel): + condition: str = Field(description="The diagnosed mental condition or concern.") + rationale: str = Field(description="Why the classification was made.") + +class DataRetrievalOutput(BaseModel): + id: int = Field(description="User profile ID.") + name: str = Field(description="User name") + age: int = Field(description="User age") + gender: str = Field(description="Gender of user") + city_region: str = Field(description="City or region of user") + profession: str = Field(description="Profession of user") + marital_status: str = Field(description="Marital status of user") + previous_mental_diagnosis: str = Field(description="Prviously diagnosed mental health conditions of user") + ethnicity: str = Field(description="Ethnicity of user") + +class RecommendationResult(BaseModel): + recommendation: Optional[str] + +# ======================= AGENT FACTORY ======================= +def create_agent(role: str, goal: str, backstory: str, tools=None,**kwargs) -> Agent: + return Agent( + role=role, + goal=goal, + backstory=backstory, + tools=tools or [], + llm=llm, + verbose=True, + allow_delegation=False, + **kwargs + ) + +# ======================= AGENTS ======================= +crisis_detection_agent = create_agent( + "Crisis Detection Specialist", + "Identify immediate crisis situations and escalate if needed.", + "You are a highly empathetic and vigilant AI assistant trained to detect signs of "\ + "severe distress, suicidal ideation, or other mental health emergencies. "\ + "Your primary responsibility is to classify the query as crisis or no-crisis situation using the tool you have. "\ + "If the tool output indicates 'is_crisis=True', then it is a CRISIS situation otherwise it is NO CRISIS situation.", + tools=[crisis_classifier_tool] +) + +mental_condition_classifier_agent = create_agent( + "Mental Health Condition Classifier", + "Classify the user\'s mental health concern or condition, specifically aiming to identify the relevant questionnaire based on the condition detected.", + "You are a meticulous AI specialized in understanding various mental health "\ + "states. You analyze user input and identify keywords for stress, anxiety, depression, substance abuse, bipolar, alcohol use etc. "\ + "and categorize their current concern, with a preference for matching it to a standard assessment "\ + "like PHQ-9 (depression), GAD-7 (anxiety), DAST-10 (substance abuse), AUDIT (Alcohol use) or Bipolar (bipolar syndrome) to 'General Well-being' or 'Other' using the tool you have."\ + """ **Tool Usage:** + If the confidence score from the tool's result is greater than 0.5, only then classify the query as the condition returned from the tool. + Otherwise, classify the query based on your own knowledge. + + If the tool's classification is deemed unreliable (score <= 0.5), analyze the text manually based on your understanding of mental health conditions and identify relevant questionnaires: + + - **Depression:** PHQ-9 + - **Anxiety:** GAD-7 + - **Substance Abuse:** DAST-10 + - **Alcohol Use:** AUDIT + - **Bipolar Syndrome:** Bipolar Disorder Assessment + - **General Well-being:** Other mental health concerns + """, + tools=[mental_condition_classifier_tool] +) + +data_retriever_agent = create_agent( + "User Profile Data Retriever", + "Retrieve user profile details using the provided user profile ID. Use the data retrieval tool to fetch the user profile.", + "You are responsible for fetching the user profile from the database if the user exits in the session. If the user is anonymous, then use the default user profile.", + tools=[data_retriever_tool] +) + +rag_agent = create_agent( + "Knowledge Base Manager & Query Refiner", + "Interpret user queries, formulate specific search terms, and manage/query the mental health knowledge base using RAG.'", + "You are responsible for intelligently understanding user needs, even from vague inputs. "\ + "You will formulate precise search queries or identify relevant mental health keywords "\ + "before efficiently retrieving relevant information from the vector database. "\ + "You ensure that the knowledge base is always up-to-date and accessible for generating "\ + "informed recommendations, and that relevant information is always found, even for general queries.", + tools=[query_vector_store] # This tool performs PostgreSQL vector search +) + +recommendation_agent = create_agent( + "Personalized Recommendation Generator", + "Provide tailored mental health recommendations based on all gathered information, including questionnaire interpretation.", + "You are a compassionate and knowledgeable AI dedicated to offering "\ + "actionable and personalized advice. You synthesize user queries, "\ + "profile data, assessment answers, and the interpretation from assessments "\ + "to deliver helpful recommendations, including suggesting professional help when appropriate.", + tools=[mental_health_tools.get_bhutanese_helplines], + reasoning=True +) + +# ======================= TASKS ======================= +crisis_detection_task = Task( + description="Analyze the user's current query: '{user_query}' to determine if it indicates a mental health crisis or emergency."\ + "Use the crisis detection tool to analyze the text. "\ + "Look for indicators such as: suicidal ideation, self-harm, immediate danger, severe distress, emergency situations. ", + expected_output="Strict JSON with keys is_crisis (bool) and explanation (string). No markdown, no commentary.", + output_json=CrisisDetectionOutput, + input_variables=["user_query"], + agent=crisis_detection_agent +) + +mental_condition_classification_task = Task( + description="Analyze the user's query '{user_query}' and user profile '{user_profile}' to classify their mental health condition.", + expected_output="Strict JSON with keys condition (string) and rationale fields (string).", + output_json=MentalConditionOutput, + input_variables=["user_query", "user_profile"], + agent=mental_condition_classifier_agent +) + +data_retriever_task = Task( + description="Fetch user profile data in structured JSON."\ + "Use Data Retrieval tool to retrieve the user profile information with input as '{user_profile_id}'.", + expected_output="User demographic and background profile as Strict JSON.", + input_variables=["user_query", "user_profile_id"], + output_json = DataRetrievalOutput, + agent=data_retriever_agent +) + +rag_task = Task( + description=dedent(""" + Retrieve top 3 relevant documents for the query: '{user_query}' and condition: '{classified_condition}' + + Guidelines: + 1. Analyze the '{user_query}'. If it is general or vague (e.g., 'I'm feeling down', 'I need some advice'), + use your intelligence to formulate a more specific query or identify potential mental health keywords + (e.g., 'stress', 'anxiety', 'depression', 'general well-being') that reflect the user's potential + underlying condition. Prioritize keywords present in the vector database's . If the query is already specific, use it directly. + 2. The output should be relevant information blocks from the knowledge base. + based on the refined query. + """), + expected_output="A JSON object with a list of relevant texts under key 'docs'", + input_variables=["user_query", "classified_condition"], + agent=rag_agent +) + +recommendation_task = Task( + description=dedent(""" + Generate comprehensive, personalized mental health recommendations based on all available information: + - User query: '{user_query}' + - User profile: '{user_profile}' + - Identified condition: '{classified_condition}' + - Assessment results: '{assessment_answers}' with interpretation: '{interpretation}' + - Crisis status: '{is_crisis}' + + + Guidelines: + 1. Provide culturally sensitive recommendations aligned with Bhutanese values and Gross National Happiness principles. + 2. Include actionable, practical steps the user can take. + 3. Consider the user's specific profile (age, background, history). + 4. Summarize the retrieved documents in your context. + 4. Use this summary to support your recommendation. Reference specific sources used from your context'. + 6. If assessment was completed, incorporate the interpretation. DO NOT reveal the assessment results to the user. + 7. For crisis situations, prioritize immediate safety and professional help. + 8. Use compassionate, encouraging language. + 9. Suggest community resources, traditional practices, and professional help as appropriate. + 10. Only provide helplines when specifically needed for crisis situations. + + Structure your response as a comprehensive recommendation that addresses the user's specific needs. + """), + expected_output="A comprehensive, personalized, and empathetic mental health recommendation tailored to the user's specific situation.", + agent=recommendation_agent, + context=[rag_task], + output_json=RecommendationResult, + input_variables=["user_query", "user_profile","classified_condition", "assessment_answers", "interpretation", "is_crisis"], +) + +# ======================= CREWS ======================= +crisis_management_crew = Crew(agents=[crisis_detection_agent], tasks=[crisis_detection_task], verbose=True) +mental_condition_crew = Crew(agents=[mental_condition_classifier_agent], tasks=[mental_condition_classification_task], verbose=True) +data_retrieval_crew = Crew(agents=[data_retriever_agent], tasks=[data_retriever_task], verbose=True) +recommendation_crew = Crew(agents=[rag_agent, recommendation_agent], tasks=[rag_task, recommendation_task], full_output=True, output_log_file=True, verbose=True) + +# ======================= EXPORTABLE API ======================= +def run_crisis_check(user_query: str) -> dict: + result = crisis_management_crew.kickoff({"user_query": user_query}) + return result.json_dict + +def run_condition_classification(user_query: str, user_profile: str) -> dict: + condition_result = mental_condition_crew.kickoff({ + "user_query": user_query, + "user_profile": user_profile + }) + return condition_result.json_dict + +def run_user_profile_retrieval(user_query: str, user_profile_id: str) -> dict: + data_result = data_retrieval_crew.kickoff({ + "user_query": user_query, + "user_profile_id": user_profile_id + }) + return data_result.json_dict + +def run_recommendations(user_query: str, user_profile: str, condition: str, answers: str, interpretation: str, is_crisis: str): + recommendation_result = recommendation_crew.kickoff({ + "user_query": user_query, + "user_profile": user_profile, + "classified_condition": condition, + "assessment_answers": answers, + "interpretation": interpretation, + "is_crisis": is_crisis + }) + return recommendation_result.json_dict + +# ======================= FULL CHAT FLOW ======================= +# @traceable(name= "Druckare Chatbot full flow") +def full_chat_flow(user_query: str, user_id: str = "anon_user"): + print("📄 Fetching user profile...") + dummy_profile = { + "id": user_id, + "age": "", + "location": "", + "history": "", + "preferences": "Prefers meditation" + } + + print("🔍 Checking for crisis...") + crisis_result = run_crisis_check(user_query) + print("📦 Crew result:", crisis_result) + + is_crisis = crisis_result.get("is_crisis", False) + explanation = crisis_result.get("explanation", "") + + if is_crisis: + print(f"🚨 Crisis Detected: {explanation}") + rec = run_recommendations( + user_query, + user_profile=json.dumps(dummy_profile), + condition="Crisis", + answers="{}", + interpretation="N/A", + is_crisis="true" + ) + # task_outputs = rec.tasks_output + # retrieved_docs_crisis = task_outputs[0] + + print("\n🆘 Crisis Support Recommendation:\n", rec) + return { + "recommendation": rec["recommendation"], + "score_interpretation": interpretation, + "condition": condition, + "is_crisis": is_crisis, + "crisis_explanation": explanation, + # "retrieved_docs": retrieved_docs_crisis + } + else: + print("No crisis detected") + + print("🔎 Classifying condition...") + condition_result = run_condition_classification(user_query, json.dumps(dummy_profile)) + condition = condition_result.get("condition").lower() + questionnaire_name = config["CONDITION_TO_QUESTIONNAIRE"].get(condition) + + print(f"🧠 Detected condition: {condition}") + if questionnaire_name not in QUESTIONS: + print("Skipping assessment as condition is general or unknown.") + score = "N/A" + answers = {} + interpretation = "Not applicable" + else: + # Ask for user confirmation + confirm = input(f"👉 We recommend a brief '{questionnaire_name}' assessment (e.g., {condition.upper()} questionnaire). Do you want to proceed? (yes/no): ").strip().lower() + + if confirm != 'yes': + print("🚫 Assessment skipped by user.") + score = 'N/A' + answers = {} + interpretation = "User chose not to proceed with the assessment." + else: + # Show instructions + print("\n📝 Instructions:") + print("You will now be presented with a few questions related to your mental health condition.") + if questionnaire_name in ["PHQ-9", "GAD-7"]: + print("Please answer each question honestly, based on how you've felt over the **last 2 weeks**.") + print("Use the following scale to respond:") + print(" 0 - Not at all") + print(" 1 - Several days") + print(" 2 - More than half the days") + print(" 3 - Nearly every day") + elif questionnaire_name == "DAST-10": + print("Answer each question with Yes or No based on your past year's experience.") + elif questionnaire_name == "AUDIT": + print("Answer using options like: Never, 1-2, 3-4, Weekly, Yes, No, etc., as applicable.") + elif questionnaire_name == "Bipolar": + print("Answer each question with Yes or No based on your past mood and energy patterns.") + + input("\nPress Enter to begin the questionnaire...") + + # Proceed with assessment + assessment = conduct_assessment(questionnaire_name) + answers = assessment["answers"] + score = assessment["score"] + interpretation = assessment["interpretation"] + + print("💡 Generating recommendations...") + final_rec = run_recommendations( + user_query, + json.dumps(dummy_profile), + condition, + json.dumps(answers), + interpretation, + is_crisis="false" + ) + + # task_outputs = final_rec.tasks_output + # retrieved_docs = task_outputs[0] + + return { + "recommendation": final_rec["recommendation"], + "score_interpretation": interpretation, + "condition": condition, + "is_crisis": is_crisis, + "crisis_explanation": explanation, + # "retrieved_docs": retrieved_docs + } + + +if __name__ == "__main__": + query = input("👤 Enter your mental health query: ") + final_output = full_chat_flow(query) + print("Final Output:\n", final_output) + diff --git a/crew_ai/config.py b/crew_ai/config.py new file mode 100644 index 0000000000000000000000000000000000000000..794484ce26ba8cd3ca69e08ed3dcaa36b2f9b0b8 --- /dev/null +++ b/crew_ai/config.py @@ -0,0 +1,47 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +def get_config(): + """ + Returns a dictionary of configuration values used across the chatbot system. + Pulls values from environment variables where appropriate. + """ + return { + # API Keys + "google_api_key": os.getenv("GOOGLE_API_KEY", ""), + + # LLM settings + "llm_model": os.getenv("LLM_MODEL", "groq/llama-3.3-70b-versatile"), + "llm_temperature": float(os.getenv("LLM_TEMPERATURE", "0.0")), + "llm_max_tokens": int(os.getenv("LLM_MAX_TOKENS", "1024")), + "llm_timeout": int(os.getenv("LLM_TIMEOUT", "30")), + "llm_max_retries": int(os.getenv("LLM_MAX_RETRIES", "2")), + + # Tool model settings + "crisis_model": os.getenv("CRISIS_MODEL", "lmsdmn/crisis-detection-model"), + + # Questionnaire path + "questionnaire_file": os.getenv("QUESTIONNAIRE_FILE", "questionnaire.json"), + + # Default profile for anonymous or test users + "default_user_profile": { + "id": "anon_user", + "location": "null", + "history": "null", + "preferences": "null" + }, + + "CONDITION_TO_QUESTIONNAIRE" : { + "anxiety": "GAD-7", + "depression": "PHQ-9", + "substance": "DAST-10", + "alcohol": "AUDIT", + "bipolar": "Bipolar", + "audit": "AUDIT", + "dast-10": "DAST-10", + "phq-9": "PHQ-9", + "gad-7": "GAD-7" + } + } diff --git a/crew_ai/data_retriever_util.py b/crew_ai/data_retriever_util.py new file mode 100644 index 0000000000000000000000000000000000000000..273efcb12718e1f0a3537b1d98971d48b3432dbe --- /dev/null +++ b/crew_ai/data_retriever_util.py @@ -0,0 +1,100 @@ +from sqlalchemy import create_engine, Column, String, Integer, Text +from sqlalchemy.dialects.postgresql import UUID +import uuid +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +#from datetime import datetime +from typing import Optional +import os +import uuid + +# Database configuration +DATABASE_URL = os.getenv("SUPABASE_DB_URI") +print(f"Connecting to database at {DATABASE_URL}") + + +# SQLAlchemy setup +Base = declarative_base() +engine = create_engine(DATABASE_URL) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# Define the UserProfile model +class UserProfile(Base): + __tablename__ = "user_profiles" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + name = Column(String(100), nullable=False) + age = Column(Integer) + gender = Column(String(20)) + city_region = Column(String(100)) + profession = Column(String(100)) + marital_status = Column(String(30)) + previous_mental_diagnosis = Column(Text, default='NA') + ethnicity = Column(String(50)) + #created_at = Column(DateTime(timezone=True), server_default=func.now()) + #updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + + def __repr__(self): + return f"" + + def to_dict(self): + """Convert the model instance to a dictionary""" + return { + 'id': self.id, + 'name': self.name, + 'age': self.age, + 'gender': self.gender, + 'city_region': self.city_region, + 'profession': self.profession, + 'marital_status': self.marital_status, + 'previous_mental_diagnosis': self.previous_mental_diagnosis, + 'ethnicity': self.ethnicity, + #'created_at': self.created_at.isoformat() if self.created_at else None, + #'updated_at': self.updated_at.isoformat() if self.updated_at else None + } + +# Database operations class +class UserProfileRepository: + def __init__(self): + self.session = SessionLocal() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.session.close() + + def get_user_by_id(self, user_id: str) -> Optional[UserProfile]: + """ + Fetch a single user profile by ID + + Args: + user_id (str): The user ID to search for + + Returns: + UserProfile or None: The user profile if found, None otherwise + """ + try: + user = self.session.query(UserProfile).filter(UserProfile.id == user_id).first() + return user + except Exception as e: + print(f"Error fetching user by ID {user_id}: {e}") + self.session.rollback() + return None + + +def get_user_profile(user_id: str) -> Optional[dict]: + """ + Retrieve a user profile by ID + + Args: + user_id (str): The user ID to search for + + Returns: + dict or None: User profile data as a dictionary, or None if not found + """ + with UserProfileRepository() as repo: + user = repo.get_user_by_id(user_id) + return user.to_dict() if user else None + + diff --git a/crew_ai/fastapi_integration.py b/crew_ai/fastapi_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..45c662da5f4cedb6e125fee1148f42482039b409 --- /dev/null +++ b/crew_ai/fastapi_integration.py @@ -0,0 +1,102 @@ +""" +FastAPI-compatible crew_ai processing function +""" +import json +from typing import Dict, Any +from crew_ai.chatbot import run_user_profile_retrieval, run_recommendations, run_crisis_check, run_condition_classification +from crew_ai.config import get_config + +def process_user_input(context: Dict[str, Any]) -> Dict[str, Any]: + """ + Process user input using crew_ai agents for FastAPI integration + + Args: + context: Dictionary containing user message and context + + Returns: + Dictionary with response, confidence, and additional data + """ + try: + user_message = context.get("user_message", "") + user_name = context.get("user_name", "Guest") + session_id = context.get("session_id", "default") + + # Build contextual query from available information + contextual_parts = [] + if context.get("mental_health_status") and context.get("mental_health_status") != "Unknown": + contextual_parts.append(f"Mental health status: {context['mental_health_status']}") + + if context.get("user_emotion") and context.get("user_emotion") != "neutral": + contextual_parts.append(f"Current emotion: {context['user_emotion']}") + + if context.get("detailed_scores"): + scores_summary = [] + for scale, details in context["detailed_scores"].items(): + scores_summary.append(f"{scale}: {details.get('interpretation', 'N/A')}") + contextual_parts.append(f"Assessment results: {', '.join(scores_summary)}") + + if context.get("recommendations"): + contextual_parts.append(f"Previous recommendations: {', '.join(context['recommendations'][:2])}") + + contextual_query = f"User says: '{user_message}'. " + " ".join(contextual_parts) + + # Check for crisis first + crisis_result = run_crisis_check(contextual_query) + is_crisis = crisis_result.get("is_crisis", False) + + if is_crisis: + # Handle crisis situation + crisis_rec = run_recommendations( + contextual_query, + json.dumps({"name": user_name, "session_id": session_id}), + condition="Crisis", + answers="{}", + interpretation="N/A", + is_crisis="true" + ) + + return { + "message": crisis_rec.get("recommendation", "If you're in immediate danger, please contact emergency services: 112 in Bhutan."), + "confidence": 0.95, + "is_crisis": True, + "condition": "crisis", + "recommendations": ["Seek immediate professional help", "Contact emergency services if in danger"], + "resources": ["Emergency: 112", "Mental Health Helpline: 1717"] + } + + # Normal processing - get user profile + user_profile = {"name": user_name, "session_id": session_id} + + # Classify condition + condition_result = run_condition_classification(contextual_query, json.dumps(user_profile)) + condition = condition_result.get("condition", "general").split(" ")[0].lower() + + # Get recommendations + final_rec = run_recommendations( + contextual_query, + json.dumps(user_profile), + condition, + json.dumps(context.get("detailed_scores", {})), + context.get("mental_health_status", "Unknown"), + is_crisis="false" + ) + + return { + "message": final_rec.get("recommendation", "I'm here to support your mental health journey. How can I help you today?"), + "confidence": 0.8, + "is_crisis": False, + "condition": condition, + "recommendations": context.get("recommendations", []), + "resources": ["Mental Health Helpline: 1717", "Emergency: 112"] + } + + except Exception as e: + print(f"Error in crew_ai processing: {e}") + return { + "message": "I'm here to support you. While I process your request, please know that help is always available.", + "confidence": 0.5, + "is_crisis": False, + "condition": "general", + "recommendations": ["Practice self-care", "Consider speaking with a mental health professional"], + "resources": ["Mental Health Helpline: 1717", "Emergency: 112"] + } diff --git a/crew_ai/full_chat_flow.py b/crew_ai/full_chat_flow.py new file mode 100644 index 0000000000000000000000000000000000000000..1bb46af420f0b51f755ac5fd15749226074b1495 --- /dev/null +++ b/crew_ai/full_chat_flow.py @@ -0,0 +1,122 @@ +import streamlit as st +import json +import time +from dotenv import load_dotenv + +load_dotenv() + +from crew_ai.chatbot import run_user_profile_retrieval, run_recommendations, run_crisis_check, run_condition_classification +from crew_ai.config import get_config + +config = get_config() + +def full_chat_flow(contextual_query: str, user_query: str, user_id: str = "anon_user"): + + with st.chat_message("user"): + st.write(st.session_state.chat_history[-1]["content"]) + + with st.chat_message("assistant"): + st.write("📄 Processing your query...") + + # Retrieve user profile + if st.session_state.user_profile is None: + st.session_state.user_profile = run_user_profile_retrieval(user_query, user_id) + + crisis_result = run_crisis_check(contextual_query) + is_crisis = crisis_result.get("is_crisis", False) + explanation = crisis_result.get("explanation", "") + + # Store crisis detection in session state + st.session_state.crisis_detected = is_crisis + st.session_state.crisis_explanation = explanation + + if is_crisis: + with st.chat_message("assistant"): + st.error(f"🚨 Crisis detected: {explanation}") + st.info("We're prioritizing your safety. Connecting you with the best support now...") + rec = run_recommendations( + contextual_query, + json.dumps(st.session_state.user_profile), + condition="Crisis", + answers="{}", + interpretation="N/A", + is_crisis="true" + ) + + # task_outputs = rec.tasks_output + # retrieved_docs_crisis = task_outputs[0] + + with st.chat_message("assistant"): + st.write("🆘 Crisis Support Recommendation:") + def stream_answer(): + for word in rec["recommendation"].split(" "): + yield word + " " + time.sleep(0.1) + st.write_stream(stream_answer) + + return rec + + # return { + # "recommendation": rec["recommendation"], + # "condition": "crisis", + # "is_crisis": True, + # "crisis_explanation": explanation, + # "score_interpretation": "Not applicable", + # # "retrieved_docs": retrieved_docs_crisis + # } + + with st.chat_message("assistant"): + st.write("✅ No immediate crisis detected.") + st.write("🔍 Let's understand your mental health condition...") + + condition_result = run_condition_classification(contextual_query, json.dumps(st.session_state.user_profile)) + condition = condition_result.get("condition", "general").split(" ")[0].lower() + condition_explanation = condition_result.get("rationale", "") + questionnaire_name = config["CONDITION_TO_QUESTIONNAIRE"].get(condition) + + # Store condition detection in session state + st.session_state.detected_condition = condition + st.session_state.condition_explanation = condition_explanation + + with st.chat_message("assistant"): + st.success(f"🧠 Based on your message, we identified: **{condition.upper()}**") + if not questionnaire_name: + st.info("No questionnaire available for this condition.") + interpretation = "Not applicable" + else: + st.info(f"We recommend a brief **{questionnaire_name}** assessment to understand your situation better.") + st.session_state.assessment_needed = True + st.session_state.temp_user_query = user_query + st.session_state.temp_condition = condition + st.session_state.questionnaire_name = questionnaire_name + return None + + final_rec = run_recommendations( + contextual_query, + json.dumps(st.session_state.user_profile), + condition, + json.dumps({}), + "N/A", + is_crisis="false" + ) + + # task_outputs = final_rec.tasks_output + # retrieved_docs = task_outputs[0] + + with st.chat_message("assistant"): + st.write("💡 Here's your personalized mental health recommendation:") + def stream_answer(): + for word in final_rec["recommendation"].split(" "): + yield word + " " + time.sleep(0.1) + st.write_stream(stream_answer) + + return final_rec + # return { + # "recommendation": final_rec["recommendation"], + # "score_interpretation": interpretation, + # "condition": condition, + # "is_crisis": False, + # "crisis_explanation": explanation, + # # "retrieved_docs": retrieved_docs + # } diff --git a/crew_ai/llm_setup.py b/crew_ai/llm_setup.py new file mode 100644 index 0000000000000000000000000000000000000000..e0d7bcbecf07abc9216fc0328e01eaca35b7f601 --- /dev/null +++ b/crew_ai/llm_setup.py @@ -0,0 +1,20 @@ +import os +from dotenv import load_dotenv +from crewai import LLM + +load_dotenv() + +def get_llm(): + """Initializes and returns the Gemini LLM with fallback handling.""" + try: + return LLM( + model="gemini/gemini-2.0-flash", + api_key=os.getenv("GOOGLE_API_KEY"), + temperature=0, + max_tokens=None, + timeout=None, + max_retries=2, + ) + except Exception as e: + print(f"❌ Error initializing LLM: {e}") + return None diff --git a/crew_ai/questionnaire.json b/crew_ai/questionnaire.json new file mode 100644 index 0000000000000000000000000000000000000000..4a30fa1b8c10a36a790b37fa46c12636a01704a5 --- /dev/null +++ b/crew_ai/questionnaire.json @@ -0,0 +1,64 @@ +{ + "PHQ-9": [ + "Over the last 2 weeks, how often have you been bothered by any of the following problems? (Not at all, Several days, More than half the days, Nearly every day)", + "1. Little interest or pleasure in doing things?", + "2. Feeling down, depressed, or hopeless?", + "3. Trouble falling or staying asleep, or sleeping too much?", + "4. Feeling tired or having little energy?", + "5. Poor appetite or overeating?", + "6. Feeling bad about yourself - or that you are a failure or have let yourself or your family down?", + "7. Trouble concentrating on things, such as reading the newspaper or watching television?", + "8. Moving or speaking so slowly that other people could have noticed? Or the opposite - being so fidgety or restless that you have been moving around a lot more than usual?", + "9. Thoughts that you would be better off dead or of hurting yourself in some way?" + ], + "GAD-7": [ + "Over the last 2 weeks, how often have you been bothered by the following problems? (Not at all, Several days, More than half the days, Nearly every day)", + "1. Feeling nervous, anxious, or on edge?", + "2. Not being able to stop or control worrying?", + "3. Worrying too much about different things?", + "4. Trouble relaxing?", + "5. Being so restless that it's hard to sit still?", + "6. Becoming easily annoyed or irritable?", + "7. Feeling afraid as if something awful might happen?" + ], + "DAST-10": [ + "The following questions concern information about your involvement with drugs (excluding alcohol and tobacco) during the past 12 months. Please read each question carefully and decide which answer is appropriate for you. (Yes/No)", + "1. Have you used drugs other than those required for medical reasons?", + "2. Have you abused more than one drug at a time?", + "3. Are you able to stop using drugs when you want to?", + "4. Have you had blackouts or flashbacks from drug use?", + "5. Do you ever feel bad or guilty about your drug use?", + "6. Does your spouse (or parents) ever complain about your involvement with drugs?", + "7. Have you neglected your family because of your use of drugs?", + "8. Have you engaged in illegal activities in order to obtain drugs?", + "9. Have you ever experienced withdrawal symptoms (felt sick) when you stopped taking drugs?", + "10. Have you had medical problems as a result of your drug use (e.g., memory loss, hepatitis, convulsions, bleeding, etc.)?" + ], + "AUDIT": [ + "Please answer the following questions based on your alcohol use over the past 12 months.", + "1. How often do you have a drink containing alcohol? (0) Never [Skip to Q9-10], (1) Monthly or less, (2) 2 to 4 times a month, (3) 2 to 3 times a week, (4) 4 or more times a week", + "2. How many drinks containing alcohol do you have on a typical day when you are drinking? (0) 1 or 2, (1) 3 or 4, (2) 5 or 6, (3) 7, 8, or 9, (4) 10 or more", + "3. How often do you have six or more drinks on one occasion? (0) Never, (1) Less than monthly, (2) Monthly, (3) Weekly, (4) Daily or almost daily", + "4. How often during the last year have you found that you were not able to stop drinking once you had started? (0–4 scale as above)", + "5. How often during the last year have you failed to do what was normally expected from you because of drinking? (0–4 scale as above)", + "6. How often during the last year have you needed a first drink in the morning to get yourself going after a heavy drinking session? (0–4 scale as above)", + "7. How often during the last year have you had a feeling of guilt or remorse after drinking? (0–4 scale as above)", + "8. How often during the last year have you been unable to remember what happened the night before because you had been drinking? (0–4 scale as above)", + "9. Have you or someone else been injured as a result of your drinking? (0) No, (2) Yes, but not in the last year, (4) Yes, during the last year", + "10. Has a relative or friend or a doctor or another health worker been concerned about your drinking or suggested you cut down? (0) No, (2) Yes, but not in the last year, (4) Yes, during the last year" + ], + "Bipolar": [ + "Have there ever been times when you were not your usual self and...", + "1. You felt so good or hyper that other people thought you were not your normal self or were so hyper that you got into trouble?", + "2. You were so irritable that you shouted at people or started fights or arguments?", + "3. You felt much more self-confident than usual?", + "4. You got much less sleep than usual and found you didn't really miss it?", + "5. You were much more talkative or spoke faster than usual?", + "6. You had racing thoughts?", + "7. You were so easily distracted that you had trouble concentrating?", + "8. You were more active or did more things than usual?", + "9. You were much more social or outgoing than usual?", + "10. You did things that were unusual for you or that other people might have thought were excessive or risky?", + "11. Spending money got you or your family into trouble?" + ] +} diff --git a/crew_ai/questionnaire.py b/crew_ai/questionnaire.py new file mode 100644 index 0000000000000000000000000000000000000000..7b81156e53ef0dff501af2e5baf77b802de7bef7 --- /dev/null +++ b/crew_ai/questionnaire.py @@ -0,0 +1,444 @@ +# modules/questionnaire.py +import json +from typing import Dict, Any + +# Path to your questionnaire file +QUESTIONNAIRES_FILE = "crew_ai/questionnaire.json" + +def load_questionnaires() -> Dict[str, Any]: + """Load questionnaires from a file or fallback to defaults.""" + try: + with open(QUESTIONNAIRES_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + print(f"⚠️ Could not load {QUESTIONNAIRES_FILE}. Using default questions.") + return create_default_questionnaires() + +def create_default_questionnaires() -> Dict[str, Any]: + """Default fallback questions.""" + return { + "PHQ-9": [ + "Over the last 2 weeks, how often have you been bothered by any of the following problems? (0-3)", + "Little interest or pleasure in doing things", + "Feeling down, depressed, or hopeless", + "Trouble falling asleep or sleeping too much", + "Feeling tired or having little energy", + "Poor appetite or overeating", + "Feeling bad about yourself", + "Trouble concentrating", + "Moving/speaking slowly or being fidgety", + "Thoughts of self-harm or death" + ], + "GAD-7": [ + "Over the last 2 weeks, how often have you been bothered by the following problems? (0-3)", + "Feeling nervous or on edge", + "Not being able to stop worrying", + "Worrying too much", + "Trouble relaxing", + "Restlessness", + "Irritability", + "Feeling something awful might happen" + ], + "DAST-10": [ + "The following questions are about drug use in the past year. Answer Yes or No.", + "Used drugs not prescribed?", + "Abused more than one drug at once?", + "Tried and failed to stop using?", + "Experienced blackouts or flashbacks?", + "Felt guilty about drug use?", + "Had family complain about your use?", + "Neglected responsibilities?", + "Committed illegal acts for drugs?", + "Had withdrawal symptoms?", + "Had medical problems due to use?" + ], + "AUDIT": [ + "Please answer the following questions based on your alcohol use over the past 12 months.", + "1. How often do you have a drink containing alcohol? (0) Never [Skip to Q9-10], (1) Monthly or less, (2) 2 to 4 times a month, (3) 2 to 3 times a week, (4) 4 or more times a week", + "2. How many drinks containing alcohol do you have on a typical day when you are drinking? (0) 1 or 2, (1) 3 or 4, (2) 5 or 6, (3) 7, 8, or 9, (4) 10 or more", + "3. How often do you have six or more drinks on one occasion? (0) Never, (1) Less than monthly, (2) Monthly, (3) Weekly, (4) Daily or almost daily", + "4. How often during the last year have you found that you were not able to stop drinking once you had started? (0–4 scale as above)", + "5. How often during the last year have you failed to do what was normally expected from you because of drinking? (0–4 scale as above)", + "6. How often during the last year have you needed a first drink in the morning to get yourself going after a heavy drinking session? (0–4 scale as above)", + "7. How often during the last year have you had a feeling of guilt or remorse after drinking? (0–4 scale as above)", + "8. How often during the last year have you been unable to remember what happened the night before because you had been drinking? (0–4 scale as above)", + "9. Have you or someone else been injured as a result of your drinking? (0) No, (2) Yes, but not in the last year, (4) Yes, during the last year", + "10. Has a relative or friend or a doctor or another health worker been concerned about your drinking or suggested you cut down? (0) No, (2) Yes, but not in the last year, (4) Yes, during the last year" + ], + "Bipolar": [ + "Have there ever been times when you were not your usual self and...", + "1. You felt so good or hyper that others thought you were not your normal self or that you got into trouble?", + "2. You were so irritable that you shouted at people or started arguments?", + "3. You felt much more self-confident than usual?", + "4. You got much less sleep than usual and didn’t really miss it?", + "5. You were much more talkative or spoke faster than usual?", + "6. You had racing thoughts?", + "7. You were easily distracted?", + "8. You were more active or did more things than usual?", + "9. You were much more social or outgoing than usual?", + "10. You did risky things that could have caused trouble?", + "11. Spending money got you or your family into trouble?" + ] + } + +def conduct_assessment(condition: str) -> Dict[str, Any]: + """Run questionnaire and return answers, score, and interpretation.""" + questions = load_questionnaires().get(condition, []) + if not questions: + return {"answers": {}, "score": "N/A", "interpretation": "No questions found."} + + print(f"\n📝 Starting {condition} assessment:\n") + answers = {} + for i, q in enumerate(questions[1:], 1): # skip instructions + user_input = input(f"Q{i}. {q} ").strip().lower() + answers[q] = user_input + + score = score_questionnaire(condition, answers) + interpretation = interpret_score(condition, score) + + return { + "answers": answers, + "score": score, + "interpretation": interpretation + } + +def score_questionnaire(condition: str, answers: Dict[str, str]) -> int: + """Score PHQ-9, GAD-7, DAST-10 , Bipolar and AUDIT answers.""" + score = 0 + if condition in ["PHQ-9", "GAD-7"]: + scale = { + "0": 0, "not at all": 0, + "1": 1, "several days": 1, + "2": 2, "more than half the days": 2, + "3": 3, "nearly every day": 3 + } + for ans in answers.values(): + cleaned = ans.strip().lower() + if '-' in cleaned: + cleaned = cleaned.split("-", 1)[-1].strip() + score += scale.get(cleaned, 0) + + elif condition == "DAST-10": + for ans in answers.values(): + score += 1 if ans.lower() in ["yes", "y", "true", "1"] else 0 + + elif condition == "AUDIT": + score = 0 + question_keys = [f"Q{i}" for i in range(1,11)] + skip_to_end = False + + scale_0_to_4 = { + "never": 0, + "monthly or less": 1, + "less than monthly": 1, + "2 to 4 times a month": 2, + "5 or 6": 2, + "monthly": 2, + "2 to 3 times a week": 3, + "7, 8, or 9": 3, + "weekly": 3, + "4 or more times a week": 4, + "10 or more": 4, + "daily or almost daily": 4, + "1 or 2": 0, + "3 or 4": 1 + } + + scale_0_2_4 = { + "no": 0, + "yes, but not in the last year": 2, + "yes, during the last year": 4 + } + + # === Q1 logic (skip if "never") === + ans = answers.get("Q1", "").strip().lower() + print("Answer to Q1:", ans) + # ans1_clean = ans1_raw.replace("(", "").replace(")", "").replace(",", "").strip() + if ans == "never": + skip_to_end = True + score += 0 + else: + for key in scale_0_to_4: + if key in ans: + score += scale_0_to_4[key] + break + + if skip_to_end: + for qkey in ["Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8"]: + answers[qkey] = "Skipped" + # Score Q9 and Q10 only + for qkey in ["Q9", "Q10"]: # Q9, Q10 + ans = answers.get(qkey, "").strip().lower() + # ans = ans.replace("(", "").replace(")", "").replace(",", "").strip() + for key in scale_0_2_4: + if key in ans: + score += scale_0_2_4[key] + break + return score + + # Continue with Q2–Q8 + for qkey in question_keys[1:8]: # Q2 to Q8 + ans = answers.get(qkey, "").strip().lower() + # ans = ans.replace("(", "").replace(")", "").replace(",", "").strip() + for key in scale_0_to_4: + if key in ans: + score += scale_0_to_4[key] + break + + # Score Q9, Q10 + for qkey in ["Q9","Q10"]: + ans = answers.get(qkey, "").strip().lower() + # ans = ans.replace("(", "").replace(")", "").replace(",", "").strip() + for key in scale_0_2_4: + if key in ans: + score += scale_0_2_4[key] + break + + return score + + elif condition == "Bipolar": + for ans in answers.values(): + score += 1 if ans.strip().lower() in ["yes", "y", "true", "1"] else 0 + + return score + +def interpret_score(condition: str, score: int) -> str: + """Interpret the score based on condition.""" + if condition == "PHQ-9": + if score <= 4: return "Minimal depression" + elif score <= 9: return "Mild depression" + elif score <= 14: return "Moderate depression" + elif score <= 19: return "Moderately severe depression" + return "Severe depression" + + if condition == "GAD-7": + if score <= 4: return "Minimal anxiety" + elif score <= 9: return "Mild anxiety" + elif score <= 14: return "Moderate anxiety" + return "Severe anxiety" + + if condition == "DAST-10": + if score == 0: return "No problems reported" + elif score <= 2: return "Low level of problems" + elif score <= 5: return "Moderate problems" + elif score <= 8: return "Substantial problems" + return "Severe problems" + + if condition == "AUDIT": + if score <= 7: return "Lower risk, usually no action needed." + elif score >= 8 and score <= 14: return "Hazardous or harmful alcohol use. Brief advice or counseling may be appropriate." + elif score >= 15 and score <= 19: return "Harmful alcohol use. Brief counseling and continued monitoring recommended." + elif score >= 20: return "Likely alcohol dependence. Referral for specialist assessment and treatment is recommended." + else: + return "Score out of typical AUDIT range." + + if condition == "Bipolar": + if score >= 7: return "Likely signs of bipolar disorder" + return "Unlikely bipolar symptoms" + + + return "Score interpreted" + +def calculate_phq9_score(responses): + """Calculate PHQ-9 depression score from responses""" + if len(responses) != 9: + return {"score": 0, "severity": "Invalid", "risk": "Low"} + + score = sum(responses) + + if score <= 4: + severity = "Minimal depression" + risk = "Low" + elif score <= 9: + severity = "Mild depression" + risk = "Low" + elif score <= 14: + severity = "Moderate depression" + risk = "Moderate" + elif score <= 19: + severity = "Moderately severe depression" + risk = "High" + else: + severity = "Severe depression" + risk = "High" + + # Check for suicidal ideation (question 9) + if responses[8] > 0: + risk = "High" + severity += " (with suicidal ideation)" + + return { + "score": score, + "severity": severity, + "risk": risk, + "max_score": 27 + } + +def calculate_gad7_score(responses): + """Calculate GAD-7 anxiety score from responses""" + if len(responses) != 7: + return {"score": 0, "severity": "Invalid", "risk": "Low"} + + score = sum(responses) + + if score <= 4: + severity = "Minimal anxiety" + risk = "Low" + elif score <= 9: + severity = "Mild anxiety" + risk = "Low" + elif score <= 14: + severity = "Moderate anxiety" + risk = "Moderate" + else: + severity = "Severe anxiety" + risk = "High" + + return { + "score": score, + "severity": severity, + "risk": risk, + "max_score": 21 + } + +def calculate_dast10_score(responses): + """Calculate DAST-10 substance use score from responses""" + if len(responses) != 10: + return {"score": 0, "severity": "Invalid", "risk": "Low"} + + score = sum(responses) + + if score == 0: + severity = "No problems reported" + risk = "Low" + elif score <= 2: + severity = "Low level of problems" + risk = "Low" + elif score <= 5: + severity = "Moderate problems" + risk = "Moderate" + elif score <= 8: + severity = "Substantial problems" + risk = "High" + else: + severity = "Severe problems" + risk = "High" + + return { + "score": score, + "severity": severity, + "risk": risk, + "max_score": 10 + } + +def calculate_audit_score(responses): + """Calculate AUDIT alcohol use score from responses""" + if len(responses) != 10: + return {"score": 0, "severity": "Invalid", "risk": "Low"} + + score = sum(responses) + + if score <= 7: + severity = "Lower risk" + risk = "Low" + recommendation = "No action needed" + elif score <= 14: + severity = "Hazardous or harmful alcohol use" + risk = "Moderate" + recommendation = "Brief advice or counseling may be appropriate" + elif score <= 19: + severity = "Harmful alcohol use" + risk = "High" + recommendation = "Brief counseling and continued monitoring recommended" + else: + severity = "Likely alcohol dependence" + risk = "High" + recommendation = "Referral for specialist assessment and treatment recommended" + + return { + "score": score, + "severity": severity, + "risk": risk, + "recommendation": recommendation, + "max_score": 40 + } + +def calculate_bipolar_score(responses): + """Calculate Bipolar screening score from responses""" + if len(responses) != 11: + return {"score": 0, "severity": "Invalid", "risk": "Low"} + + score = sum(responses) + + if score >= 7: + severity = "Likely signs of bipolar disorder" + risk = "High" + recommendation = "Further assessment recommended" + else: + severity = "Unlikely bipolar symptoms" + risk = "Low" + recommendation = "No immediate concerns" + + return { + "score": score, + "severity": severity, + "risk": risk, + "recommendation": recommendation, + "max_score": 11 + } + +def get_assessment_recommendations(scores): + """Generate overall assessment recommendations based on all scores""" + high_risk_areas = [] + moderate_risk_areas = [] + recommendations = [] + + # Analyze each domain + for domain, result in scores.items(): + if isinstance(result, dict) and 'risk' in result: + if result['risk'] == 'High': + high_risk_areas.append(domain.upper()) + elif result['risk'] == 'Moderate': + moderate_risk_areas.append(domain.upper()) + + # Overall status + if high_risk_areas: + overall_status = "High Risk - Professional Support Recommended" + recommendations.append("We strongly recommend consulting with a mental health professional") + recommendations.append("Consider scheduling an appointment with your doctor or a counselor") + elif moderate_risk_areas: + overall_status = "Moderate Risk - Consider Professional Guidance" + recommendations.append("Consider speaking with a counselor or mental health professional") + recommendations.append("Monitor your symptoms and seek help if they worsen") + else: + overall_status = "Low Risk - Continue Self-Care" + recommendations.append("Continue practicing good mental health habits") + recommendations.append("Stay connected with supportive friends and family") + + # Specific recommendations + if 'phq9' in scores and scores['phq9'].get('risk') == 'High': + recommendations.append("For depression: Consider therapy, medication evaluation, or support groups") + + if 'gad7' in scores and scores['gad7'].get('risk') == 'High': + recommendations.append("For anxiety: Practice relaxation techniques, consider counseling") + + if 'dast10' in scores and scores['dast10'].get('risk') in ['High', 'Moderate']: + recommendations.append("For substance use: Consider addiction counseling or support programs") + + if 'audit' in scores and scores['audit'].get('risk') in ['High', 'Moderate']: + recommendations.append("For alcohol use: Consider reducing consumption or seeking guidance") + + if 'bipolar' in scores and scores['bipolar'].get('risk') == 'High': + recommendations.append("For mood symptoms: Psychiatric evaluation recommended") + + # Emergency recommendations + if 'phq9' in scores and len(scores['phq9'].get('severity', '').split('suicidal')) > 1: + recommendations.insert(0, "🚨 IMMEDIATE: If you're having thoughts of self-harm, please contact emergency services (112/110) or the National Mental Health Helpline (1717)") + + return { + "overall_status": overall_status, + "high_risk_areas": high_risk_areas, + "moderate_risk_areas": moderate_risk_areas, + "recommendations": recommendations, + "summary": f"Assessment completed. Areas of concern: {', '.join(high_risk_areas + moderate_risk_areas) if high_risk_areas or moderate_risk_areas else 'None identified'}" + } diff --git a/crew_ai/tools.py b/crew_ai/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..442fd7da576c4d50757def8191746e87b50e0c1f --- /dev/null +++ b/crew_ai/tools.py @@ -0,0 +1,205 @@ +import os +from crewai.tools import BaseTool +from crewai.tools import tool +from transformers import pipeline +from backend.crew_ai.data_retriever_util import get_user_profile +from backend.crew_ai.config import get_config +import psycopg2 +from psycopg2.extras import RealDictCursor +from typing import ClassVar +from langchain_huggingface import HuggingFaceEmbeddings +from transformers import pipeline +from gradio_client import Client + +class MentalHealthTools: + """Tools for mental health chatbot""" + @tool("Bhutanese Helplines") + def get_bhutanese_helplines() -> str: + """ + Retrieves Bhutanese mental health helplines from the PostgreSQL `resources` table. + + """ + try: + db_uri = os.getenv("SUPABASE_DB_URI") + if not db_uri: + raise ValueError("SUPABASE_DB_URI not set in environment") + + conn = psycopg2.connect(db_uri) + cursor = conn.cursor(cursor_factory=RealDictCursor) + + query = """ + SELECT name, description, phone, website, address, operation_hours + FROM resources + """ + cursor.execute(query) + helplines = cursor.fetchall() + + if not helplines: + return "No helplines found in the database." + + response = "📞 Bhutanese Mental Health Helplines:\n" + for h in helplines: + response += f"\n📌 {h['name']}" + if h['description']: + response += f"\n Description: {h['description']}" + if h['phone']: + response += f"\n 📱 Phone: {h['phone']}" + if h['website']: + response += f"\n 🌐 Website: {h['website']}" + if h['address']: + response += f"\n 🏠 Address: {h['address']}" + if h['operation_hours']: + response += f"\n ⏰ Hours: {h['operation_hours']}" + response += "\n" + + cursor.close() + conn.close() + return response.strip() + + except Exception as e: + return f"⚠️ Failed to fetch helplines from DB: {str(e)}" + + +class CrisisClassifierTool(BaseTool): + name: str = "Crisis Classifier" + description: str = ( + "A tool that classifies text into predefined categories. " + "Input should be the text to classify." + ) + + def _run(self, text: str) -> str: + """ + Classifies the given text using the Hugging Face model. + Returns the classification label and score. + """ + try: + # Initialize the pipeline here (will happen on every tool call) + classifier = pipeline("sentiment-analysis", model="sentinet/suicidality") + result = classifier(text) + if result: + label = result[0]['label'] + score = result[0]['score'] + return f"Classification: {label} (Score: {score:.4f})" + return "Could not classify the text." + except Exception as e: + return f"Error during text classification: {e}" + +class MentalConditionClassifierTool(BaseTool): + name: str = "Mental condition Classifier" + description: str = ( + "A tool that classifies text into predefined categories. " + "Input should be the text to classify." + ) + + # Class-level cache for the client + _client = None + + def _get_client(self): + if self._client is None: + self.__class__._client = Client("ety89/mental_health_text_classifiaction") # ✅ fixed typo + return self._client + + def _run(self, text: str) -> str: + """ + Classifies the given text using the Hugging Face model. + Returns the classification label and score. + """ + try: + # Initialize the pipeline here (will happen on every tool call) + + client = Client("ety89/mental_health_text_classifiaction") + result = client.predict( + input_text=text, + api_name="/predict" + ) + if result: + label = result.split(':')[-2].split('(')[-2].strip() + score = result.split(':')[-1].strip(')').strip() + return label, score + + return "Could not classify the text." + + except Exception as e: + return f"Error during text classification: {e}" + +class DataRetrievalTool(BaseTool): + name: str = "Data Retrieval" + description: str = ( + "A tool that fetched the user profile data from the database. " + "Input should be User Profile ID." + ) + + + def _run(self, user_profile_id: str) -> str: + """ + Fetches the user profile data from the database using the user profile ID. + Returns the user profile information or an error message. + """ + try: + + config = get_config() + + if user_profile_id.strip() == "anon_user": + return config['default_user_profile'] + + # Retrieve user profile using the utility function + user_profile = get_user_profile(user_profile_id) + if user_profile: + return f"User Profile: {user_profile}" + return "User profile not found." + except Exception as e: + return f"Error retrieving user profile: {e}" + +class QueryVectorStoreTool(BaseTool): + name: str = "Query Vector Store" + description: str = ( + "Queries the Supabase-hosted PostgreSQL vector database with a user query and classified condition, " + "and retrieves the top 3 most relevant documents." + ) + + # Shared across all instances + embedding_model: ClassVar = HuggingFaceEmbeddings( + model_name="sentence-transformers/all-MiniLM-L6-v2" + ) + + def _run(self, user_query: str, classified_condition: str) -> dict: + query_text = f"{user_query} Condition: {classified_condition}" + embedding = self.embedding_model.embed_query(query_text) + + db_uri = os.getenv("SUPABASE_DB_URI") + if not db_uri: + raise ValueError("SUPABASE_DB_URI not set in environment") + + conn = psycopg2.connect(db_uri) + cursor = conn.cursor() + + cursor.execute(""" + SELECT ac.chunk_text, a.title, a.topic, a.source, ac.embedding <-> %s::vector AS score + FROM article_chunks ac + JOIN articles a ON ac.doc_id = a.id + ORDER BY score + LIMIT 3; + """, (embedding,)) + + + rows = cursor.fetchall() + docs = [ + { + "text": row[0], + "title": row[1], + "topic": row[2], + "source": row[3], + "score": row[4] + } + for row in rows + ] + + cursor.close() + conn.close() + + return {"docs": docs} + + def _arun(self, *args, **kwargs): + raise NotImplementedError("Async version not implemented") + + \ No newline at end of file diff --git a/knowledge/deathless.pdf b/knowledge/deathless.pdf new file mode 100644 index 0000000000000000000000000000000000000000..72af2f97a7f052f738bee7ab4b905a87be0510e3 --- /dev/null +++ b/knowledge/deathless.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2093f3a6ead1873e17641bf56be71b13829326b2ef86ae16f3d6c3431b0173a2 +size 881162 diff --git a/knowledge/know-see.pdf b/knowledge/know-see.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5128be21a550b4a206cec4aa4b77078a569be884 --- /dev/null +++ b/knowledge/know-see.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67f032cd0a7bb85615fdf7822142277a9d67312f8c7b6a7b0a4612eb5261238 +size 2976949 diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/models/__pycache__/__init__.cpython-311.pyc b/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82155b3482232729f70e8248e837e09a3387a1b5 Binary files /dev/null and b/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/models/__pycache__/chat_session.cpython-311.pyc b/models/__pycache__/chat_session.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b51cf4eb6fdc21df6a88da0b46b652cd6c78a69e Binary files /dev/null and b/models/__pycache__/chat_session.cpython-311.pyc differ diff --git a/models/__pycache__/user.cpython-311.pyc b/models/__pycache__/user.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..56e86d6330795a29408516764db32b473dea450d Binary files /dev/null and b/models/__pycache__/user.cpython-311.pyc differ diff --git a/models/chat_session.py b/models/chat_session.py new file mode 100644 index 0000000000000000000000000000000000000000..6d01f66003984c1aa5f71aeb31632cb21f0c8f82 --- /dev/null +++ b/models/chat_session.py @@ -0,0 +1,84 @@ +from datetime import datetime +from pathlib import Path +import json +import uuid + +class ChatSession: + def __init__(self, user_name, session_id=None): + self.session_id = session_id or str(uuid.uuid4()) + self.user_name = user_name + self.start_time = datetime.now() + self.messages = [] + self.session_data = { + "emotion": None, + "mental_health_status": None, + "topics_discussed": set(), + "agents_used": set(), + "sentiment_scores": [] + } + + def add_message(self, role, content, agent=None, metadata=None): + message = { + "timestamp": datetime.now().isoformat(), + "role": role, + "content": content, + "agent": agent, + "metadata": metadata or {} + } + self.messages.append(message) + + if agent and hasattr(self.session_data["agents_used"], 'add'): + self.session_data["agents_used"].add(agent) + + def add_topic(self, topic): + """Safely add a topic""" + if hasattr(self.session_data["topics_discussed"], 'add'): + self.session_data["topics_discussed"].add(topic) + else: + # If it's a list, convert to set first + topics = set(self.session_data.get("topics_discussed", [])) + topics.add(topic) + self.session_data["topics_discussed"] = topics + + def to_dict(self): + return { + "session_id": self.session_id, + "user_name": self.user_name, + "start_time": self.start_time.isoformat(), + "end_time": datetime.now().isoformat(), + "duration_minutes": (datetime.now() - self.start_time).seconds // 60, + "messages": self.messages, + "total_messages": len(self.messages), + "session_data": { + **self.session_data, + "topics_discussed": list(self.session_data["topics_discussed"]) if isinstance(self.session_data["topics_discussed"], set) else self.session_data["topics_discussed"], + "agents_used": list(self.session_data["agents_used"]) if isinstance(self.session_data["agents_used"], set) else self.session_data["agents_used"] + } + } + + @classmethod + def from_dict(cls, data, user_name=None): + """Create a ChatSession from dictionary data""" + session = cls(user_name or data.get('user_name', 'Guest'), data.get('session_id')) + session.messages = data.get('messages', []) + + # Reconstruct session_data with proper types + stored_data = data.get('session_data', {}) + session.session_data = { + "emotion": stored_data.get('emotion'), + "mental_health_status": stored_data.get('mental_health_status'), + "topics_discussed": set(stored_data.get('topics_discussed', [])), + "agents_used": set(stored_data.get('agents_used', [])), + "sentiment_scores": stored_data.get('sentiment_scores', []) + } + + return session + + def save(self, directory="chat_sessions"): + Path(directory).mkdir(exist_ok=True) + filename = f"{directory}/chat_{self.user_name}_{self.start_time.strftime('%Y%m%d_%H%M%S')}_{self.session_id[:8]}.json" + + with open(filename, 'w') as f: + json.dump(self.to_dict(), f, indent=2) + + return filename \ No newline at end of file diff --git a/models/user.py b/models/user.py new file mode 100644 index 0000000000000000000000000000000000000000..caeb2a385b44e368026d3cebfaf078b727cc618a --- /dev/null +++ b/models/user.py @@ -0,0 +1,43 @@ +from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from werkzeug.security import generate_password_hash, check_password_hash +from datetime import datetime +import secrets + +Base = declarative_base() + +class User(Base): + __tablename__ = 'users' + + id = Column(Integer, primary_key=True) + username = Column(String(80), unique=True, nullable=False) + email = Column(String(120), unique=True, nullable=False) + password_hash = Column(String(255), nullable=False) + full_name = Column(String(100)) + created_at = Column(DateTime, default=datetime.utcnow) + last_login = Column(DateTime) + is_active = Column(Boolean, default=True) + is_verified = Column(Boolean, default=False) + + # Profile completion status + has_completed_initial_survey = Column(Boolean, default=False) + initial_survey_date = Column(DateTime) + + # Session management + session_token = Column(String(255)) + + def set_password(self, password): + self.password_hash = generate_password_hash(password) + + def check_password(self, password): + return check_password_hash(self.password_hash, password) + + def generate_session_token(self): + self.session_token = secrets.token_urlsafe(32) + return self.session_token + +# Create database +engine = create_engine('sqlite:///mental_health_app.db') +Base.metadata.create_all(engine) +Session = sessionmaker(bind=engine) \ No newline at end of file