Law-chatbot / config.py
ak0601's picture
Upload 15 files
73ab00e verified
raw
history blame
2.79 kB
"""
Configuration file for the Law RAG Chatbot application
"""
import os
from typing import Optional
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
# Load environment variables from .env file if it exists
def load_dotenv():
"""Load environment variables from .env file"""
env_file = Path('.env')
if env_file.exists():
with open(env_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, value = line.split('=', 1)
os.environ[key] = value
# Load .env file
load_dotenv()
# Hugging Face Configuration
HF_TOKEN = os.getenv('HF_TOKEN')
HF_DATASET_NAME = "Amod/mental_health_counseling_conversations"
# Groq Configuration
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
GROQ_MODEL = "llama3-8b-8192" # or "mixtral-8x7b-32768"
# Embedding Configuration
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_DIMENSION = 384
# ChromaDB Configuration
CHROMA_PERSIST_DIR = "./chroma_db"
CHROMA_COLLECTION_NAME = "mental_health_counseling"
# FastAPI Configuration
API_TITLE = "Mental Health Counseling Chatbot API"
API_VERSION = "1.0.0"
API_DESCRIPTION = "RAG-based mental health counseling chatbot using Amod/mental_health_counseling_conversations data"
HOST = "0.0.0.0"
PORT = 8000
# RAG Configuration
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200
TOP_K_RETRIEVAL = 8 # Increased from 5
MAX_TOKENS = 4096
TEMPERATURE = 0.1
DEFAULT_CONTEXT_LENGTH = 5 # New default context length
# Token Management Configuration
MAX_CONTEXT_TOKENS = 4000 # Maximum tokens for context (reserve space for prompt)
MAX_PROMPT_TOKENS = 6000 # Maximum total prompt tokens (Groq limit)
MAX_SOURCES = 5 # Maximum number of sources to include
MAX_SEARCH_VARIATIONS = 2 # Maximum search variations to try
MAX_LEGAL_CONCEPTS = 2 # Maximum legal concepts to extract
# Dataset Configuration
DATASET_SPLIT = "train"
CACHE_DIR = ".cache"
# Error Messages
ERROR_MESSAGES = {
"no_hf_token": "Hugging Face token not found. Set HF_TOKEN environment variable.",
"no_groq_key": "Groq API key not found. Set GROQ_API_KEY environment variable.",
"auth_failed": "Authentication failed: {}",
"dataset_load_failed": "Failed to load dataset: {}",
"embedding_failed": "Failed to create embeddings: {}",
"vector_db_failed": "Failed to setup vector database: {}",
"llm_failed": "Failed to initialize LLM: {}"
}
# API Response Models
class ChatRequest:
question: str
context_length: Optional[int] = 3
class ChatResponse:
answer: str
sources: list
confidence: float
processing_time: float