Spaces:
Running
Running
Upload 2 files
Browse files- rag_system.py +55 -138
- requirements.txt +2 -1
rag_system.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
-
|
2 |
-
RAG System for Law Chatbot using Langchain, Groq, and ChromaDB
|
3 |
-
"""
|
4 |
|
5 |
import os
|
6 |
import logging
|
@@ -301,10 +299,12 @@ class RAGSystem:
|
|
301 |
search_results = self._filter_relevant_results(search_results, question)
|
302 |
|
303 |
if not search_results:
|
|
|
|
|
304 |
return {
|
305 |
-
"answer":
|
306 |
"sources": [],
|
307 |
-
"confidence": 0.
|
308 |
}
|
309 |
|
310 |
# Prepare context for LLM
|
@@ -412,40 +412,47 @@ class RAGSystem:
|
|
412 |
async def _generate_llm_response(self, question: str, context: str) -> str:
|
413 |
"""Generate response using Groq LLM with token management"""
|
414 |
try:
|
415 |
-
#
|
416 |
-
|
417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
Use the following evidence-based psychological information to address the user’s concerns with care and accuracy.
|
419 |
|
420 |
Therapeutic Context:
|
421 |
-
{context}
|
422 |
|
423 |
-
User’s Concern: {question}
|
424 |
|
425 |
Guidelines for Response:
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
Cite sources when referencing specific therapies or studies (e.g., "APA guidelines suggest...").
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
When discussing crises, emphasize jurisdictional resources (e.g., "Laws/programs vary by location, but here’s how to find local help...").
|
438 |
-
|
439 |
-
Prioritize validation and education—not just information.
|
440 |
|
441 |
Example Response:
|
442 |
-
"I hear you’re feeling overwhelmed. Based on [Context Source], deep breathing exercises can help calm acute anxiety. However, if these feelings persist for weeks, it might reflect generalized anxiety disorder (GAD). Always consult a licensed therapist for personalized care. Would you like crisis hotline numbers or a step-by-step grounding technique?
|
443 |
-
|
444 |
-
|
445 |
# Estimate total tokens
|
446 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
447 |
logger.info(f"Estimated prompt tokens: {estimated_prompt_tokens}")
|
448 |
-
|
449 |
# If still too large, truncate context further
|
450 |
if estimated_prompt_tokens > MAX_PROMPT_TOKENS: # Use config value
|
451 |
logger.warning(f"Prompt too large ({estimated_prompt_tokens} tokens), truncating context further")
|
@@ -453,20 +460,23 @@ Example Response:
|
|
453 |
context = self._truncate_context(context, max_context_tokens)
|
454 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
455 |
logger.info(f"After truncation: {estimated_prompt_tokens} tokens")
|
456 |
-
|
457 |
# Create enhanced prompt template for legal questions
|
458 |
prompt = ChatPromptTemplate.from_template(prompt_template)
|
459 |
-
|
460 |
# Create chain
|
461 |
chain = prompt | self.llm | StrOutputParser()
|
462 |
-
|
463 |
# Generate response
|
464 |
response = await chain.ainvoke({
|
465 |
"question": question,
|
466 |
"context": context
|
467 |
})
|
468 |
-
|
469 |
-
|
|
|
|
|
|
|
470 |
|
471 |
except Exception as e:
|
472 |
logger.error(f"Error generating LLM response: {e}")
|
@@ -868,132 +878,39 @@ If you have a specific legal question, please try rephrasing it or contact a loc
|
|
868 |
)
|
869 |
|
870 |
def _is_conversational_query(self, question: str) -> bool:
|
871 |
-
"""Detect if the query is
|
872 |
question_lower = question.lower().strip()
|
873 |
-
|
874 |
# Common greetings and casual conversation
|
875 |
greetings = [
|
876 |
"hi", "hello", "hey", "good morning", "good afternoon", "good evening",
|
877 |
"how are you", "how's it going", "what's up", "sup", "yo"
|
878 |
]
|
879 |
-
|
880 |
# Very short or casual queries
|
881 |
if len(question_lower) <= 3 or question_lower in greetings:
|
882 |
return True
|
883 |
-
|
884 |
-
# Questions that don't need legal context
|
885 |
casual_questions = [
|
886 |
"how can you help", "what can you do", "what are you", "who are you",
|
887 |
"are you working", "are you there", "can you hear me", "test"
|
888 |
]
|
889 |
-
|
890 |
for casual in casual_questions:
|
891 |
-
if casual
|
892 |
return True
|
893 |
-
|
894 |
-
# If it's not clearly legal, treat as conversational
|
895 |
-
if not self._is_legal_query(question):
|
896 |
-
return True
|
897 |
-
|
898 |
return False
|
899 |
|
900 |
def _generate_conversational_response(self, question: str) -> str:
|
901 |
-
"""Generate
|
902 |
question_lower = question.lower().strip()
|
903 |
-
|
904 |
-
if question_lower in
|
905 |
-
return "
|
906 |
-
|
907 |
-
• Anxiety and stress management
|
908 |
-
• Depression and mood challenges
|
909 |
-
• Trauma healing and PTSD recovery
|
910 |
-
• Relationship and family dynamics
|
911 |
-
• Workplace stress and burnout prevention
|
912 |
-
• Self-esteem and personal growth journeys
|
913 |
-
• Grief processing and life transitions
|
914 |
-
• And many other emotional wellness concerns
|
915 |
-
|
916 |
-
This is a safe space where you can:
|
917 |
-
|
918 |
-
Share what's on your mind without judgment
|
919 |
-
|
920 |
-
Explore healthy coping strategies
|
921 |
-
|
922 |
-
Understand your emotional experiences
|
923 |
-
|
924 |
-
Find resources for professional support
|
925 |
-
|
926 |
-
How would you like to begin today?
|
927 |
-
You could tell me how you're feeling, ask about coping techniques, or explore resources for specific challenges."""
|
928 |
-
|
929 |
elif "how can you help" in question_lower or "what can you do" in question_lower:
|
930 |
-
return "
|
931 |
-
|
932 |
-
• Anxiety and stress management
|
933 |
-
• Depression and mood disorders
|
934 |
-
• Trauma recovery and PTSD
|
935 |
-
• Relationship and family challenges
|
936 |
-
• Workplace burnout and career stress
|
937 |
-
• Grief and loss processing
|
938 |
-
• Self-esteem and personal growth
|
939 |
-
• Coping skills and resilience building
|
940 |
-
• And many other emotional wellness concerns
|
941 |
-
|
942 |
-
I offer a safe space to explore your feelings, develop coping strategies, and find resources. Remember, while I'm here to support you, I'm not a replacement for professional care in crisis situations.
|
943 |
-
|
944 |
-
How would you like to begin today?
|
945 |
-
You could share what's on your mind, how you're feeling, or ask about:
|
946 |
-
|
947 |
-
Coping techniques for [specific emotion]
|
948 |
-
|
949 |
-
Understanding [mental health term]
|
950 |
-
|
951 |
-
Local therapist resources
|
952 |
-
|
953 |
-
Self-care strategies"""
|
954 |
-
|
955 |
elif "who are you" in question_lower or "what are you" in question_lower:
|
956 |
-
return "
|
957 |
-
|
958 |
-
• Search through therapeutic resources and evidence-based practices
|
959 |
-
• Explain mental health concepts and coping strategies
|
960 |
-
• Provide information on conditions, symptoms, and treatments
|
961 |
-
• Help you navigate therapy options and self-care techniques
|
962 |
-
• Share reputable mental health sources and crisis resources
|
963 |
-
|
964 |
-
I'm not a licensed therapist, and I can't diagnose or treat conditions, but I can offer general information, emotional support, and tools to help you better understand your well-being.
|
965 |
-
|
966 |
-
What would you like to explore today?
|
967 |
-
You might ask about:
|
968 |
-
|
969 |
-
Understanding anxiety/depression symptoms
|
970 |
-
|
971 |
-
Grounding techniques for stress
|
972 |
-
|
973 |
-
How cognitive behavioral therapy (CBT) works
|
974 |
-
|
975 |
-
Finding a therapist near you
|
976 |
-
|
977 |
-
Managing [specific emotion or situation]"""
|
978 |
-
|
979 |
else:
|
980 |
-
return "
|
981 |
-
|
982 |
-
How can I assist you today? You might ask about:**
|
983 |
-
|
984 |
-
Relaxation techniques for anxiety
|
985 |
-
|
986 |
-
Understanding depression symptoms
|
987 |
-
|
988 |
-
How to find a therapist
|
989 |
-
|
990 |
-
Coping with [specific stressor]
|
991 |
-
|
992 |
-
Self-care for tough emotions
|
993 |
-
|
994 |
-
(Note: I’m not a substitute for professional care, but I’m here to listen and guide.)
|
995 |
-
|
996 |
-
What’s on your mind?"""
|
997 |
|
998 |
def _filter_relevant_results(self, search_results: List[Dict[str, Any]], question: str) -> List[Dict[str, Any]]:
|
999 |
"""Filter search results for relevance to the question"""
|
@@ -1050,4 +967,4 @@ What’s on your mind?"""
|
|
1050 |
relevant_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
1051 |
|
1052 |
logger.info(f"Filtered {len(search_results)} results to {len(relevant_results)} relevant results")
|
1053 |
-
return relevant_results
|
|
|
1 |
+
|
|
|
|
|
2 |
|
3 |
import os
|
4 |
import logging
|
|
|
299 |
search_results = self._filter_relevant_results(search_results, question)
|
300 |
|
301 |
if not search_results:
|
302 |
+
# No relevant docs found: generate a short, supportive answer using LLM with empty context
|
303 |
+
response = await self._generate_llm_response(question, context="")
|
304 |
return {
|
305 |
+
"answer": response,
|
306 |
"sources": [],
|
307 |
+
"confidence": 0.5 # Lower confidence since no docs
|
308 |
}
|
309 |
|
310 |
# Prepare context for LLM
|
|
|
412 |
async def _generate_llm_response(self, question: str, context: str) -> str:
|
413 |
"""Generate response using Groq LLM with token management"""
|
414 |
try:
|
415 |
+
# Detect language of the question
|
416 |
+
import re
|
417 |
+
from langdetect import detect, LangDetectException
|
418 |
+
try:
|
419 |
+
user_language = detect(question)
|
420 |
+
except LangDetectException:
|
421 |
+
user_language = "en"
|
422 |
+
# Map language code to readable name (for prompt)
|
423 |
+
lang_map = {"en": "English", "hi": "Hindi"}
|
424 |
+
language_name = lang_map.get(user_language, "the user's language")
|
425 |
+
|
426 |
+
# Updated prompt template
|
427 |
+
prompt_template = f"""
|
428 |
+
You are a compassionate mental health supporter with training in anxiety, depression, trauma, and coping strategies.
|
429 |
Use the following evidence-based psychological information to address the user’s concerns with care and accuracy.
|
430 |
|
431 |
Therapeutic Context:
|
432 |
+
{{context}}
|
433 |
|
434 |
+
User’s Concern: {{question}}
|
435 |
|
436 |
Guidelines for Response:
|
437 |
+
- Reply in the same language as the user's question. If the question is in Hindi, answer in Hindi. If in another language, answer in that language.
|
438 |
+
- Strictly limit your answer to 2 sentences. Do not elaborate or add extra information. Do not repeat yourself.
|
439 |
+
- Keep your answer conversational and natural, as if chatting with a friend.
|
440 |
+
- Provide empathetic, evidence-based support rooted in the context (e.g., CBT, DBT, or mindfulness principles).
|
441 |
+
- If context is insufficient, acknowledge limits and offer general wellness strategies (e.g., grounding techniques, self-care tips).
|
442 |
+
- Cite sources when referencing specific therapies or studies (e.g., "APA guidelines suggest...").
|
443 |
+
- For symptom-related questions, differentiate between mild, moderate, and severe cases (e.g., situational stress vs. clinical anxiety).
|
444 |
+
- Use clear, stigma-free language while maintaining clinical accuracy.
|
445 |
+
- When discussing crises, emphasize jurisdictional resources (e.g., "Laws/programs vary by location, but here’s how to find local help...").
|
446 |
+
- Prioritize validation and education—not just information.
|
447 |
+
- Always reply in {language_name}.
|
|
|
|
|
|
|
448 |
|
449 |
Example Response:
|
450 |
+
"I hear you’re feeling overwhelmed. Based on [Context Source], deep breathing exercises can help calm acute anxiety. However, if these feelings persist for weeks, it might reflect generalized anxiety disorder (GAD). Always consult a licensed therapist for personalized care. Would you like crisis hotline numbers or a step-by-step grounding technique?"
|
451 |
+
"""
|
|
|
452 |
# Estimate total tokens
|
453 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
454 |
logger.info(f"Estimated prompt tokens: {estimated_prompt_tokens}")
|
455 |
+
|
456 |
# If still too large, truncate context further
|
457 |
if estimated_prompt_tokens > MAX_PROMPT_TOKENS: # Use config value
|
458 |
logger.warning(f"Prompt too large ({estimated_prompt_tokens} tokens), truncating context further")
|
|
|
460 |
context = self._truncate_context(context, max_context_tokens)
|
461 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
462 |
logger.info(f"After truncation: {estimated_prompt_tokens} tokens")
|
463 |
+
|
464 |
# Create enhanced prompt template for legal questions
|
465 |
prompt = ChatPromptTemplate.from_template(prompt_template)
|
466 |
+
|
467 |
# Create chain
|
468 |
chain = prompt | self.llm | StrOutputParser()
|
469 |
+
|
470 |
# Generate response
|
471 |
response = await chain.ainvoke({
|
472 |
"question": question,
|
473 |
"context": context
|
474 |
})
|
475 |
+
|
476 |
+
# Post-process: Truncate to first 2 sentences
|
477 |
+
sentences = re.split(r'(?<=[.!?])\s+', response.strip())
|
478 |
+
short_response = ' '.join(sentences[:2]).strip()
|
479 |
+
return short_response
|
480 |
|
481 |
except Exception as e:
|
482 |
logger.error(f"Error generating LLM response: {e}")
|
|
|
878 |
)
|
879 |
|
880 |
def _is_conversational_query(self, question: str) -> bool:
|
881 |
+
"""Detect if the query is a pure greeting or system check (not a real mental health question)"""
|
882 |
question_lower = question.lower().strip()
|
|
|
883 |
# Common greetings and casual conversation
|
884 |
greetings = [
|
885 |
"hi", "hello", "hey", "good morning", "good afternoon", "good evening",
|
886 |
"how are you", "how's it going", "what's up", "sup", "yo"
|
887 |
]
|
|
|
888 |
# Very short or casual queries
|
889 |
if len(question_lower) <= 3 or question_lower in greetings:
|
890 |
return True
|
891 |
+
# System check/capability questions
|
|
|
892 |
casual_questions = [
|
893 |
"how can you help", "what can you do", "what are you", "who are you",
|
894 |
"are you working", "are you there", "can you hear me", "test"
|
895 |
]
|
|
|
896 |
for casual in casual_questions:
|
897 |
+
if casual == question_lower:
|
898 |
return True
|
899 |
+
# Otherwise, treat as a real question (let LLM handle it)
|
|
|
|
|
|
|
|
|
900 |
return False
|
901 |
|
902 |
def _generate_conversational_response(self, question: str) -> str:
|
903 |
+
"""Generate a short, friendly response for greetings or system checks only"""
|
904 |
question_lower = question.lower().strip()
|
905 |
+
greetings = ["hi", "hello", "hey"]
|
906 |
+
if question_lower in greetings:
|
907 |
+
return "Hello! How can I support your mental health or well-being today?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
908 |
elif "how can you help" in question_lower or "what can you do" in question_lower:
|
909 |
+
return "I can offer brief, evidence-based tips and emotional support for mental health questions. What would you like to talk about?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
elif "who are you" in question_lower or "what are you" in question_lower:
|
911 |
+
return "I'm an AI companion here to help with mental health and wellness questions. How can I assist you?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
912 |
else:
|
913 |
+
return "How can I help you today? Feel free to ask about mental health, coping, or emotional support."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
914 |
|
915 |
def _filter_relevant_results(self, search_results: List[Dict[str, Any]], question: str) -> List[Dict[str, Any]]:
|
916 |
"""Filter search results for relevance to the question"""
|
|
|
967 |
relevant_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
968 |
|
969 |
logger.info(f"Filtered {len(search_results)} results to {len(relevant_results)} relevant results")
|
970 |
+
return relevant_results
|
requirements.txt
CHANGED
@@ -15,4 +15,5 @@ python-dotenv>=1.0.0
|
|
15 |
numpy>=1.24.0
|
16 |
pandas>=2.0.0
|
17 |
requests>=2.31.0
|
18 |
-
tiktoken>=0.5.0
|
|
|
|
15 |
numpy>=1.24.0
|
16 |
pandas>=2.0.0
|
17 |
requests>=2.31.0
|
18 |
+
tiktoken>=0.5.0
|
19 |
+
langdetect
|