Spaces:

Dhruv-18
/

ai-lawyer

Sleeping

ai-lawyer / config.py

Dhruv Pawar

Initial commit of AI-Lawyer project

68aa9b8 5 months ago

2.33 kB

	"""
	Configuration module for LegalMind AI application.
	Centralizes all configuration settings and directory paths.
	"""
	import os
	from dotenv import load_dotenv

	# Load environment variables - DON'T modify the .env file content after loading
	load_dotenv(override=True)

	# Base directories
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	DATA_DIR = os.path.join(BASE_DIR, "data")

	# Create necessary directories
	PDFS_DIR = os.path.join(DATA_DIR, "pdfs")
	VECTORSTORE_DIR = os.path.join(DATA_DIR, "vectorstore")
	METADATA_DIR = os.path.join(DATA_DIR, "metadata")
	CACHE_DIR = os.path.join(DATA_DIR, "cache")
	LOGS_DIR = os.path.join(DATA_DIR, "logs")
	CONVERSATIONS_DIR = os.path.join(DATA_DIR, "conversations")

	# Create all directories if they don't exist
	for directory in [DATA_DIR, PDFS_DIR, VECTORSTORE_DIR, METADATA_DIR, CACHE_DIR, LOGS_DIR, CONVERSATIONS_DIR]:
	os.makedirs(directory, exist_ok=True)

	# API Keys
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# Model settings
	DEFAULT_LLM_MODEL = os.getenv("DEFAULT_LLM_MODEL", "deepseek-r1-distill-llama-70b")
	DEFAULT_EMBEDDING_MODEL = os.getenv("DEFAULT_EMBEDDING_MODEL", "all-MiniLM-L6-v2")

	# Available embedding models
	EMBEDDING_MODELS = {
	"all-MiniLM-L6-v2": {
	"name": "all-MiniLM-L6-v2",
	"description": "Fast, lightweight model good for general use"
	},
	"all-mpnet-base-v2": {
	"name": "all-mpnet-base-v2",
	"description": "More accurate but slightly slower model"
	}
	}

	# Available LLM models
	LLM_MODELS = {
	"deepseek-r1-distill-llama-70b": {
	"name": "deepseek-r1-distill-llama-70b",
	"description": "Balanced performance and speed"
	},
	"llama3-70b-8192": {
	"name": "llama3-70b-8192",
	"description": "High quality with longer context"
	},
	"mixtral-8x7b-32768": {
	"name": "mixtral-8x7b-32768",
	"description": "Best for complex reasoning"
	}
	}

	# Vector database types
	VECTOR_DB_TYPES = {
	"faiss": {
	"description": "Fast, efficient vector database, good for most use cases"
	},
	"chroma": {
	"description": "Persistent vector database with advanced filtering"
	}
	}

	# Performance settings
	MAX_RETRIES = int(os.getenv("MAX_RETRIES", 2))
	CHUNK_SIZE = int(os.getenv("CHUNK_SIZE", 2000))
	CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", 50))