ak0601 commited on
Commit
b235289
·
verified ·
1 Parent(s): 73ab00e

Upload 2 files

Browse files
Files changed (2) hide show
  1. rag_system.py +55 -138
  2. requirements.txt +2 -1
rag_system.py CHANGED
@@ -1,6 +1,4 @@
1
- """
2
- RAG System for Law Chatbot using Langchain, Groq, and ChromaDB
3
- """
4
 
5
  import os
6
  import logging
@@ -301,10 +299,12 @@ class RAGSystem:
301
  search_results = self._filter_relevant_results(search_results, question)
302
 
303
  if not search_results:
 
 
304
  return {
305
- "answer": "I couldn't help in this case, please consult a mental health professional.",
306
  "sources": [],
307
- "confidence": 0.0
308
  }
309
 
310
  # Prepare context for LLM
@@ -412,40 +412,47 @@ class RAGSystem:
412
  async def _generate_llm_response(self, question: str, context: str) -> str:
413
  """Generate response using Groq LLM with token management"""
414
  try:
415
- # Count tokens for the entire request
416
- prompt_template = """
417
- You are a compassionate mental health supporter with training in anxiety, depression, trauma, and coping strategies.
 
 
 
 
 
 
 
 
 
 
 
418
  Use the following evidence-based psychological information to address the user’s concerns with care and accuracy.
419
 
420
  Therapeutic Context:
421
- {context}
422
 
423
- User’s Concern: {question}
424
 
425
  Guidelines for Response:
426
-
427
- Provide empathetic, evidence-based support rooted in the context (e.g., CBT, DBT, or mindfulness principles).
428
-
429
- If context is insufficient, acknowledge limits and offer general wellness strategies (e.g., grounding techniques, self-care tips).
430
-
431
- Cite sources when referencing specific therapies or studies (e.g., "APA guidelines suggest...").
432
-
433
- For symptom-related questions, differentiate between mild, moderate, and severe cases (e.g., situational stress vs. clinical anxiety).
434
-
435
- Use clear, stigma-free language while maintaining clinical accuracy.
436
-
437
- When discussing crises, emphasize jurisdictional resources (e.g., "Laws/programs vary by location, but here’s how to find local help...").
438
-
439
- Prioritize validation and education—not just information.
440
 
441
  Example Response:
442
- "I hear you’re feeling overwhelmed. Based on [Context Source], deep breathing exercises can help calm acute anxiety. However, if these feelings persist for weeks, it might reflect generalized anxiety disorder (GAD). Always consult a licensed therapist for personalized care. Would you like crisis hotline numbers or a step-by-step grounding technique?
443
- """
444
-
445
  # Estimate total tokens
446
  estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
447
  logger.info(f"Estimated prompt tokens: {estimated_prompt_tokens}")
448
-
449
  # If still too large, truncate context further
450
  if estimated_prompt_tokens > MAX_PROMPT_TOKENS: # Use config value
451
  logger.warning(f"Prompt too large ({estimated_prompt_tokens} tokens), truncating context further")
@@ -453,20 +460,23 @@ Example Response:
453
  context = self._truncate_context(context, max_context_tokens)
454
  estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
455
  logger.info(f"After truncation: {estimated_prompt_tokens} tokens")
456
-
457
  # Create enhanced prompt template for legal questions
458
  prompt = ChatPromptTemplate.from_template(prompt_template)
459
-
460
  # Create chain
461
  chain = prompt | self.llm | StrOutputParser()
462
-
463
  # Generate response
464
  response = await chain.ainvoke({
465
  "question": question,
466
  "context": context
467
  })
468
-
469
- return response.strip()
 
 
 
470
 
471
  except Exception as e:
472
  logger.error(f"Error generating LLM response: {e}")
@@ -868,132 +878,39 @@ If you have a specific legal question, please try rephrasing it or contact a loc
868
  )
869
 
870
  def _is_conversational_query(self, question: str) -> bool:
871
- """Detect if the query is conversational and doesn't need legal document search"""
872
  question_lower = question.lower().strip()
873
-
874
  # Common greetings and casual conversation
875
  greetings = [
876
  "hi", "hello", "hey", "good morning", "good afternoon", "good evening",
877
  "how are you", "how's it going", "what's up", "sup", "yo"
878
  ]
879
-
880
  # Very short or casual queries
881
  if len(question_lower) <= 3 or question_lower in greetings:
882
  return True
883
-
884
- # Questions that don't need legal context
885
  casual_questions = [
886
  "how can you help", "what can you do", "what are you", "who are you",
887
  "are you working", "are you there", "can you hear me", "test"
888
  ]
889
-
890
  for casual in casual_questions:
891
- if casual in question_lower:
892
  return True
893
-
894
- # If it's not clearly legal, treat as conversational
895
- if not self._is_legal_query(question):
896
- return True
897
-
898
  return False
899
 
900
  def _generate_conversational_response(self, question: str) -> str:
901
- """Generate appropriate response for conversational queries"""
902
  question_lower = question.lower().strip()
903
-
904
- if question_lower in ["hi", "hello", "hey"]:
905
- return """Hello! I'm your compassionate mental health companion. I'm here to offer support and guidance for various emotional well-being topics including:
906
-
907
- • Anxiety and stress management
908
- • Depression and mood challenges
909
- • Trauma healing and PTSD recovery
910
- • Relationship and family dynamics
911
- • Workplace stress and burnout prevention
912
- • Self-esteem and personal growth journeys
913
- • Grief processing and life transitions
914
- • And many other emotional wellness concerns
915
-
916
- This is a safe space where you can:
917
-
918
- Share what's on your mind without judgment
919
-
920
- Explore healthy coping strategies
921
-
922
- Understand your emotional experiences
923
-
924
- Find resources for professional support
925
-
926
- How would you like to begin today?
927
- You could tell me how you're feeling, ask about coping techniques, or explore resources for specific challenges."""
928
-
929
  elif "how can you help" in question_lower or "what can you do" in question_lower:
930
- return """"Hello! I'm your compassionate mental health companion. I'm here to provide emotional support and guidance for various psychological well-being topics including:
931
-
932
- • Anxiety and stress management
933
- • Depression and mood disorders
934
- • Trauma recovery and PTSD
935
- • Relationship and family challenges
936
- • Workplace burnout and career stress
937
- • Grief and loss processing
938
- • Self-esteem and personal growth
939
- • Coping skills and resilience building
940
- • And many other emotional wellness concerns
941
-
942
- I offer a safe space to explore your feelings, develop coping strategies, and find resources. Remember, while I'm here to support you, I'm not a replacement for professional care in crisis situations.
943
-
944
- How would you like to begin today?
945
- You could share what's on your mind, how you're feeling, or ask about:
946
-
947
- Coping techniques for [specific emotion]
948
-
949
- Understanding [mental health term]
950
-
951
- Local therapist resources
952
-
953
- Self-care strategies"""
954
-
955
  elif "who are you" in question_lower or "what are you" in question_lower:
956
- return """I'm an AI-powered mental health companion here to offer emotional support and wellness guidance. I can:
957
-
958
- • Search through therapeutic resources and evidence-based practices
959
- • Explain mental health concepts and coping strategies
960
- • Provide information on conditions, symptoms, and treatments
961
- • Help you navigate therapy options and self-care techniques
962
- • Share reputable mental health sources and crisis resources
963
-
964
- I'm not a licensed therapist, and I can't diagnose or treat conditions, but I can offer general information, emotional support, and tools to help you better understand your well-being.
965
-
966
- What would you like to explore today?
967
- You might ask about:
968
-
969
- Understanding anxiety/depression symptoms
970
-
971
- Grounding techniques for stress
972
-
973
- How cognitive behavioral therapy (CBT) works
974
-
975
- Finding a therapist near you
976
-
977
- Managing [specific emotion or situation]"""
978
-
979
  else:
980
- return """Hello! I’m here to offer emotional support and mental health resources. I can help you explore coping strategies, explain therapeutic concepts, and provide evidence-based information to support your well-being.
981
-
982
- How can I assist you today? You might ask about:**
983
-
984
- Relaxation techniques for anxiety
985
-
986
- Understanding depression symptoms
987
-
988
- How to find a therapist
989
-
990
- Coping with [specific stressor]
991
-
992
- Self-care for tough emotions
993
-
994
- (Note: I’m not a substitute for professional care, but I’m here to listen and guide.)
995
-
996
- What’s on your mind?"""
997
 
998
  def _filter_relevant_results(self, search_results: List[Dict[str, Any]], question: str) -> List[Dict[str, Any]]:
999
  """Filter search results for relevance to the question"""
@@ -1050,4 +967,4 @@ What’s on your mind?"""
1050
  relevant_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
1051
 
1052
  logger.info(f"Filtered {len(search_results)} results to {len(relevant_results)} relevant results")
1053
- return relevant_results
 
1
+
 
 
2
 
3
  import os
4
  import logging
 
299
  search_results = self._filter_relevant_results(search_results, question)
300
 
301
  if not search_results:
302
+ # No relevant docs found: generate a short, supportive answer using LLM with empty context
303
+ response = await self._generate_llm_response(question, context="")
304
  return {
305
+ "answer": response,
306
  "sources": [],
307
+ "confidence": 0.5 # Lower confidence since no docs
308
  }
309
 
310
  # Prepare context for LLM
 
412
  async def _generate_llm_response(self, question: str, context: str) -> str:
413
  """Generate response using Groq LLM with token management"""
414
  try:
415
+ # Detect language of the question
416
+ import re
417
+ from langdetect import detect, LangDetectException
418
+ try:
419
+ user_language = detect(question)
420
+ except LangDetectException:
421
+ user_language = "en"
422
+ # Map language code to readable name (for prompt)
423
+ lang_map = {"en": "English", "hi": "Hindi"}
424
+ language_name = lang_map.get(user_language, "the user's language")
425
+
426
+ # Updated prompt template
427
+ prompt_template = f"""
428
+ You are a compassionate mental health supporter with training in anxiety, depression, trauma, and coping strategies.
429
  Use the following evidence-based psychological information to address the user’s concerns with care and accuracy.
430
 
431
  Therapeutic Context:
432
+ {{context}}
433
 
434
+ User’s Concern: {{question}}
435
 
436
  Guidelines for Response:
437
+ - Reply in the same language as the user's question. If the question is in Hindi, answer in Hindi. If in another language, answer in that language.
438
+ - Strictly limit your answer to 2 sentences. Do not elaborate or add extra information. Do not repeat yourself.
439
+ - Keep your answer conversational and natural, as if chatting with a friend.
440
+ - Provide empathetic, evidence-based support rooted in the context (e.g., CBT, DBT, or mindfulness principles).
441
+ - If context is insufficient, acknowledge limits and offer general wellness strategies (e.g., grounding techniques, self-care tips).
442
+ - Cite sources when referencing specific therapies or studies (e.g., "APA guidelines suggest...").
443
+ - For symptom-related questions, differentiate between mild, moderate, and severe cases (e.g., situational stress vs. clinical anxiety).
444
+ - Use clear, stigma-free language while maintaining clinical accuracy.
445
+ - When discussing crises, emphasize jurisdictional resources (e.g., "Laws/programs vary by location, but here’s how to find local help...").
446
+ - Prioritize validation and education—not just information.
447
+ - Always reply in {language_name}.
 
 
 
448
 
449
  Example Response:
450
+ "I hear you’re feeling overwhelmed. Based on [Context Source], deep breathing exercises can help calm acute anxiety. However, if these feelings persist for weeks, it might reflect generalized anxiety disorder (GAD). Always consult a licensed therapist for personalized care. Would you like crisis hotline numbers or a step-by-step grounding technique?"
451
+ """
 
452
  # Estimate total tokens
453
  estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
454
  logger.info(f"Estimated prompt tokens: {estimated_prompt_tokens}")
455
+
456
  # If still too large, truncate context further
457
  if estimated_prompt_tokens > MAX_PROMPT_TOKENS: # Use config value
458
  logger.warning(f"Prompt too large ({estimated_prompt_tokens} tokens), truncating context further")
 
460
  context = self._truncate_context(context, max_context_tokens)
461
  estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
462
  logger.info(f"After truncation: {estimated_prompt_tokens} tokens")
463
+
464
  # Create enhanced prompt template for legal questions
465
  prompt = ChatPromptTemplate.from_template(prompt_template)
466
+
467
  # Create chain
468
  chain = prompt | self.llm | StrOutputParser()
469
+
470
  # Generate response
471
  response = await chain.ainvoke({
472
  "question": question,
473
  "context": context
474
  })
475
+
476
+ # Post-process: Truncate to first 2 sentences
477
+ sentences = re.split(r'(?<=[.!?])\s+', response.strip())
478
+ short_response = ' '.join(sentences[:2]).strip()
479
+ return short_response
480
 
481
  except Exception as e:
482
  logger.error(f"Error generating LLM response: {e}")
 
878
  )
879
 
880
  def _is_conversational_query(self, question: str) -> bool:
881
+ """Detect if the query is a pure greeting or system check (not a real mental health question)"""
882
  question_lower = question.lower().strip()
 
883
  # Common greetings and casual conversation
884
  greetings = [
885
  "hi", "hello", "hey", "good morning", "good afternoon", "good evening",
886
  "how are you", "how's it going", "what's up", "sup", "yo"
887
  ]
 
888
  # Very short or casual queries
889
  if len(question_lower) <= 3 or question_lower in greetings:
890
  return True
891
+ # System check/capability questions
 
892
  casual_questions = [
893
  "how can you help", "what can you do", "what are you", "who are you",
894
  "are you working", "are you there", "can you hear me", "test"
895
  ]
 
896
  for casual in casual_questions:
897
+ if casual == question_lower:
898
  return True
899
+ # Otherwise, treat as a real question (let LLM handle it)
 
 
 
 
900
  return False
901
 
902
  def _generate_conversational_response(self, question: str) -> str:
903
+ """Generate a short, friendly response for greetings or system checks only"""
904
  question_lower = question.lower().strip()
905
+ greetings = ["hi", "hello", "hey"]
906
+ if question_lower in greetings:
907
+ return "Hello! How can I support your mental health or well-being today?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
908
  elif "how can you help" in question_lower or "what can you do" in question_lower:
909
+ return "I can offer brief, evidence-based tips and emotional support for mental health questions. What would you like to talk about?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  elif "who are you" in question_lower or "what are you" in question_lower:
911
+ return "I'm an AI companion here to help with mental health and wellness questions. How can I assist you?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
912
  else:
913
+ return "How can I help you today? Feel free to ask about mental health, coping, or emotional support."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914
 
915
  def _filter_relevant_results(self, search_results: List[Dict[str, Any]], question: str) -> List[Dict[str, Any]]:
916
  """Filter search results for relevance to the question"""
 
967
  relevant_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
968
 
969
  logger.info(f"Filtered {len(search_results)} results to {len(relevant_results)} relevant results")
970
+ return relevant_results
requirements.txt CHANGED
@@ -15,4 +15,5 @@ python-dotenv>=1.0.0
15
  numpy>=1.24.0
16
  pandas>=2.0.0
17
  requests>=2.31.0
18
- tiktoken>=0.5.0
 
 
15
  numpy>=1.24.0
16
  pandas>=2.0.0
17
  requests>=2.31.0
18
+ tiktoken>=0.5.0
19
+ langdetect