Spaces:

bharadwaj-m
/

TravelMate-AI

Sleeping

App Files Files Community

bharadwaj-m commited on Jun 11

Commit

09aa2b8

0 Parent(s):

First Commit

Browse files

Files changed (15) hide show

.gitattributes +35 -0
.gitignore +93 -0
README.md +13 -0
api/dependencies.py +126 -0
api/main.py +344 -0
api/schemas.py +61 -0
app.py +210 -0
config/config.py +112 -0
core/data_loader.py +185 -0
core/rag_engine.py +151 -0
core/user_profile.py +464 -0
data/.gitkeep +1 -0
docs/API.md +294 -0
huggingface.yaml +113 -0
requirements.txt +44 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,93 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+.pytest_cache/
+.coverage
+htmlcov/
+# Virtual Environment
+venv/
+ENV/
+.env/
+.venv/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+*.sublime-workspace
+*.sublime-project
+# Project specific
+data/vector_store/
+data/travel_guides.json
+data/user_profiles/
+data/cache/
+.cache/
+*.log
+logs/
+.env
+.env.*
+!.env.example
+secrets.json
+secret_key.py
+# Model files
+models/
+*.bin
+*.pt
+*.pth
+*.onnx
+*.h5
+*.hdf5
+*.ckpt
+*.safetensors
+# Hugging Face
+.huggingface/
+transformers/
+datasets/
+hub/
+# Temporary files
+tmp/
+temp/
+*.tmp
+*.temp
+*.bak
+*.swp
+*~
+# System files
+.DS_Store
+Thumbs.db
+desktop.ini
+# Docker
+.docker/
+docker-compose.override.yml
+# Documentation
+docs/_build/
+site/

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: TravelMate AI
+emoji: 🌍
+colorFrom: red
+colorTo: blue
+sdk: gradio
+sdk_version: 5.33.1
+app_file: app.py
+pinned: false
+short_description: AI-Powered Customer Support Chatbot using RAG
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

api/dependencies.py ADDED Viewed

	@@ -0,0 +1,126 @@

+from fastapi import Depends, HTTPException, status
+from fastapi.security import OAuth2PasswordBearer
+from jose import JWTError, jwt
+from datetime import datetime, timedelta
+from typing import Dict, Any, Optional
+import time
+from functools import wraps
+import logging
+from cachetools import TTLCache
+# BaseSettings import removed – unused
+from config.config import settings
+logger = logging.getLogger(__name__)
+# Rate limiting cache with adjusted size
+rate_limit_cache = TTLCache(
+    maxsize=settings.MAX_CACHE_SIZE, ttl=settings.RATE_LIMIT_WINDOW
+)
+# JWT settings from environment
+SECRET_KEY = settings.JWT_SECRET_KEY
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = settings.ACCESS_TOKEN_EXPIRE_MINUTES
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
+    """Create JWT access token with validation"""
+    if not isinstance(data, dict):
+        raise ValueError("Token data must be a dictionary")
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+    to_encode.update({"exp": expire})
+    try:
+        encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+        return encoded_jwt
+    except Exception as e:
+        logger.error(f"Error creating access token: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Error creating access token",
+        )
+async def get_current_user(token: str = Depends(oauth2_scheme)) -> Dict[str, Any]:
+    """Get current user with enhanced validation"""
+    credentials_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+        user_id: str = payload.get("sub")
+        if not user_id or not isinstance(user_id, str):
+            raise credentials_exception
+        # Validate token expiration
+        exp = payload.get("exp")
+        if not exp or datetime.fromtimestamp(exp) < datetime.utcnow():
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Token has expired",
+                headers={"WWW-Authenticate": "Bearer"},
+            )
+        return {"user_id": user_id}
+    except JWTError as e:
+        logger.error(f"JWT validation error: {str(e)}", exc_info=True)
+        raise credentials_exception
+def rate_limit(func):
+    """Rate limit decorator with enhanced validation"""
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        current_user = kwargs.get("current_user")
+        if not current_user or "user_id" not in current_user:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="User ID is required for rate limiting",
+            )
+        user_id = current_user["user_id"]
+        # Check rate limit with enhanced validation
+        current_time = time.time()
+        key = f"{user_id}:{current_time // settings.RATE_LIMIT_WINDOW}"
+        try:
+            with rate_limit_cache._lock:
+                if key in rate_limit_cache:
+                    count = rate_limit_cache[key]
+                    if count >= settings.RATE_LIMIT_REQUESTS:
+                        raise HTTPException(
+                            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+                            detail=f"Rate limit exceeded. Try again in {settings.RATE_LIMIT_WINDOW} seconds",
+                        )
+                    rate_limit_cache[key] = count + 1
+                else:
+                    rate_limit_cache[key] = 1
+            return await func(*args, **kwargs)
+        except Exception as e:
+            logger.error(f"Rate limit error: {str(e)}", exc_info=True)
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Error processing rate limit",
+            )
+    return wrapper
+async def cleanup():
+    """Cleanup resources"""
+    # Add any necessary cleanup here, e.g., closing database connections
+    pass

api/main.py ADDED Viewed

	@@ -0,0 +1,344 @@

+from fastapi import FastAPI, HTTPException, Depends, Request, status, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from fastapi.security import OAuth2PasswordBearer
+from fastapi.middleware.gzip import GZipMiddleware
+from typing import Dict, Any, Optional, List
+import time
+import logging
+from datetime import datetime
+from pydantic import BaseModel, Field
+import os
+import asyncio
+from tenacity import retry, stop_after_attempt, wait_exponential
+from config.config import settings
+from core.rag_engine import RAGEngine
+from core.user_profile import UserProfile, UserPreferences
+# Define missing types
+class ChatRequest(BaseModel):
+    message: str
+    chat_history: Optional[List[Dict[str, str]]] = None
+class ChatResponse(BaseModel):
+    answer: str
+    sources: Optional[List[str]] = None
+    suggested_questions: Optional[List[str]] = None
+class ErrorResponse(BaseModel):
+    error: str
+    detail: Optional[str] = None
+    timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
+    request_id: Optional[str] = None
+class UserProfileResponse(BaseModel):
+    profile: Dict[str, Any]
+class UserPreferencesUpdate(BaseModel):
+    preferences: Dict[str, Any]
+# Setup logging with rotation
+from logging.handlers import RotatingFileHandler
+logging.basicConfig(
+    level=getattr(logging, settings.LOG_LEVEL),
+    format=settings.LOG_FORMAT,
+    handlers=[
+        logging.StreamHandler(),
+        RotatingFileHandler(
+            "api.log",
+            maxBytes=10 * 1024 * 1024,  # 10MB
+            backupCount=5,
+        ),
+    ],
+)
+logger = logging.getLogger(__name__)
+app = FastAPI(
+    title=settings.PROJECT_NAME,
+    description="AI-powered travel assistant API",
+    version=settings.VERSION,
+    docs_url="/docs",  # Always show docs on HF Spaces
+    redoc_url="/redoc",
+)
+# Add security headers middleware
+@app.middleware("http")
+async def add_security_headers(request: Request, call_next):
+    response = await call_next(request)
+    response.headers["X-Content-Type-Options"] = "nosniff"
+    response.headers["X-Frame-Options"] = "DENY"
+    response.headers["X-XSS-Protection"] = "1; mode=block"
+    response.headers["Strict-Transport-Security"] = (
+        "max-age=31536000; includeSubDomains"
+    )
+    return response
+# Add CORS middleware with validation
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins for Hugging Face Spaces
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "DELETE"],
+    allow_headers=["*"],
+    max_age=3600,
+)
+# Add Gzip compression
+app.add_middleware(GZipMiddleware, minimum_size=1000)
+# Initialize core components with retry
+@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+async def initialize_components():
+    try:
+        global rag_engine, user_profile
+        rag_engine = RAGEngine()
+        user_profile = UserProfile()
+        logger.info("Core components initialized successfully")
+    except Exception as e:
+        logger.error(f"Failed to initialize core components: {str(e)}", exc_info=True)
+        raise
+# Initialize components asynchronously
+asyncio.create_task(initialize_components())
+# OAuth2 scheme for token authentication
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
+from api.dependencies import (
+    get_current_user,
+    rate_limit,
+    cleanup,
+)
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    """Global exception handler with request ID"""
+    request_id = request.headers.get("X-Request-ID", "unknown")
+    logger.error(
+        f"Unhandled exception: {str(exc)}",
+        exc_info=True,
+        extra={"request_id": request_id},
+    )
+    return JSONResponse(
+        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        content=ErrorResponse(
+            error="Internal Server Error", detail=str(exc), request_id=request_id
+        ).dict(),
+    )
+@app.middleware("http")
+async def add_process_time_header(request: Request, call_next):
+    """Add processing time header to response"""
+    start_time = time.time()
+    try:
+        response = await call_next(request)
+        process_time = time.time() - start_time
+        response.headers["X-Process-Time"] = str(process_time)
+        return response
+    except Exception as e:
+        logger.error(f"Error in middleware: {str(e)}", exc_info=True)
+        raise
+@app.get("/")
+async def root():
+    """Root endpoint with version info"""
+    return {
+        "message": "Welcome to TravelMate AI Assistant API",
+        "version": settings.VERSION,
+        "environment": settings.DEBUG,  # Use DEBUG setting for environment
+    }
+@app.post(
+    "/chat",
+    response_model=ChatResponse,
+    responses={
+        400: {"model": ErrorResponse},
+        401: {"model": ErrorResponse},
+        429: {"model": ErrorResponse},
+        500: {"model": ErrorResponse},
+    },
+)
+@rate_limit
+async def chat(
+    request: ChatRequest,
+    background_tasks: BackgroundTasks,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """Process chat request with enhanced validation"""
+    try:
+        # Validate request size
+        if len(request.message) > settings.MAX_MESSAGE_LENGTH:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=f"Message too long. Maximum length is {settings.MAX_MESSAGE_LENGTH} characters",
+            )
+        # Validate chat history
+        if request.chat_history:
+            if len(request.chat_history) > settings.MAX_CHAT_HISTORY:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail=f"Chat history too long. Maximum length is {settings.MAX_CHAT_HISTORY} messages",
+                )
+            for msg in request.chat_history:
+                if not isinstance(msg, dict) or not all(
+                    k in msg for k in ["user", "assistant"]
+                ):
+                    raise HTTPException(
+                        status_code=status.HTTP_400_BAD_REQUEST,
+                        detail="Invalid chat history format",
+                    )
+        # Process query with RAG engine
+        result = await asyncio.wait_for(
+            rag_engine.process_query(
+                query=request.message,
+                chat_history=request.chat_history,
+                user_id=current_user["user_id"],
+            ),
+            timeout=settings.QUERY_TIMEOUT,
+        )
+        # Add cleanup task
+        background_tasks.add_task(cleanup)
+        return ChatResponse(
+            answer=result["answer"],
+            sources=result.get("metadata", {}).get("sources", []),
+            suggested_questions=result.get("suggested_questions", []),
+        )
+    except asyncio.TimeoutError:
+        raise HTTPException(
+            status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="Request timed out"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error processing chat request: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Error processing chat request",
+        )
+@app.get(
+    "/profile",
+    response_model=UserProfileResponse,
+    responses={401: {"model": ErrorResponse}, 500: {"model": ErrorResponse}},
+)
+async def get_profile(current_user: Dict[str, Any] = Depends(get_current_user)):
+    """Get user profile with enhanced error handling"""
+    try:
+        profile = await asyncio.wait_for(
+            user_profile.get_profile(current_user["user_id"]),
+            timeout=settings.PROFILE_TIMEOUT,
+        )
+        if not profile:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND, detail="Profile not found"
+            )
+        return UserProfileResponse(**profile)
+    except asyncio.TimeoutError:
+        raise HTTPException(
+            status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="Request timed out"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error getting user profile: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Error retrieving profile",
+        )
+@app.put(
+    "/profile/preferences",
+    responses={
+        400: {"model": ErrorResponse},
+        401: {"model": ErrorResponse},
+        500: {"model": ErrorResponse},
+    },
+)
+async def update_preferences(
+    preferences: UserPreferencesUpdate,
+    current_user: Dict[str, Any] = Depends(get_current_user),
+):
+    """Update user preferences with validation"""
+    try:
+        # Validate preferences
+        try:
+            UserPreferences(**preferences.preferences)
+        except Exception as e:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=f"Invalid preferences: {str(e)}",
+            )
+        success = await asyncio.wait_for(
+            user_profile.update_profile(
+                current_user["user_id"], {"preferences": preferences.preferences}
+            ),
+            timeout=settings.PROFILE_TIMEOUT,
+        )
+        if not success:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Failed to update preferences",
+            )
+        return {"message": "Preferences updated successfully"}
+    except asyncio.TimeoutError:
+        raise HTTPException(
+            status_code=status.HTTP_504_GATEWAY_TIMEOUT, detail="Request timed out"
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error updating preferences: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Error updating preferences",
+        )
+@app.get("/health", responses={500: {"model": ErrorResponse}})
+async def health_check():
+    """Health check endpoint with detailed status"""
+    try:
+        # Check core components
+        if not rag_engine or not user_profile:
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+                detail="Core components not initialized",
+            )
+        return {
+            "status": "healthy",
+            "timestamp": datetime.utcnow().isoformat(),
+            "version": settings.VERSION,
+            "environment": settings.DEBUG,  # Use DEBUG setting for environment
+            "components": {
+                "rag_engine": "ok",
+                "user_profile": "ok",
+            },
+        }
+    except Exception as e:
+        logger.error(f"Health check failed: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Service unhealthy"
+        )
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown"""
+    await cleanup()
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "api.main:app",
+        host="0.0.0.0",
+        port=int(os.getenv("PORT", 7860)),
+        reload=False,  # Set reload to False for production
+    )

api/schemas.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+class ChatRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=1000)
+    chat_history: List[Dict[str, str]] = Field(default_factory=list)
+class Source(BaseModel):
+    title: str
+    url: str
+    relevance_score: float
+class ChatResponse(BaseModel):
+    answer: str
+    sources: List[Source] = Field(default_factory=list)
+    suggested_questions: List[str] = Field(default_factory=list)
+    processing_time: Optional[float] = None
+class UserPreferences(BaseModel):
+    favorite_destinations: List[str] = Field(default_factory=list)
+    travel_style: str = Field(default="balanced")
+    preferred_seasons: List[str] = Field(default_factory=list)
+    interests: List[str] = Field(default_factory=list)
+    dietary_restrictions: List[str] = Field(default_factory=list)
+    accessibility_needs: List[str] = Field(default_factory=list)
+    language: str = Field(default="en")
+    currency: str = Field(default="USD")
+    temperature_unit: str = Field(default="C")
+    timezone: str = Field(default="UTC")
+class UserProfileResponse(BaseModel):
+    user_id: str
+    preferences: UserPreferences
+    created_at: datetime
+    updated_at: datetime
+class UserPreferencesUpdate(BaseModel):
+    favorite_destinations: Optional[List[str]] = None
+    travel_style: Optional[str] = None
+    preferred_seasons: Optional[List[str]] = None
+    interests: Optional[List[str]] = None
+    dietary_restrictions: Optional[List[str]] = None
+    accessibility_needs: Optional[List[str]] = None
+    language: Optional[str] = None
+    currency: Optional[str] = None
+    temperature_unit: Optional[str] = None
+    timezone: Optional[str] = None
+class ErrorResponse(BaseModel):
+    error: str
+    detail: Optional[str] = None
+    timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
+    request_id: Optional[str] = None

app.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import os
+import sys
+import logging
+import uuid
+from typing import List, Dict, Any, Tuple
+from logging.handlers import RotatingFileHandler
+import gradio as gr
+from tenacity import retry, stop_after_attempt, wait_exponential
+from core.rag_engine import RAGEngine
+from core.user_profile import UserProfile
+from config.config import settings
+# ======================================================================================
+# Logging Setup
+# ======================================================================================
+os.makedirs("logs", exist_ok=True)
+logging.basicConfig(
+    level=getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO),
+    format=settings.LOG_FORMAT,
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+        RotatingFileHandler(
+            settings.LOG_FILE_PATH,
+            maxBytes=settings.LOG_FILE_MAX_BYTES,
+            backupCount=settings.LOG_FILE_BACKUP_COUNT
+        ),
+    ],
+)
+logger = logging.getLogger(__name__)
+# ======================================================================================
+# Core Module Initialization
+# ======================================================================================
+@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+def initialize_with_retry(func):
+    """Initializes a component with retry logic."""
+    try:
+        return func()
+    except Exception as e:
+        logger.error(f"Initialization failed: {e}", exc_info=True)
+        raise
+try:
+    user_profile = initialize_with_retry(UserProfile)
+    rag_engine = initialize_with_retry(lambda: RAGEngine(user_profile=user_profile))
+    logger.info("Core modules initialized successfully.")
+except Exception as e:
+    logger.critical(f"Fatal: Could not initialize core modules: {e}. Exiting.", exc_info=True)
+    sys.exit(1)
+# ======================================================================================
+# Business Logic
+# ======================================================================================
+async def handle_chat_interaction(
+    message: str, chat_history: List[List[str]], user_id: str, categories: List[str]
+) -> List[List[str]]:
+    """Handles the user's chat message, processes it, and updates the history."""
+    if not message.strip():
+        gr.Warning("Message cannot be empty. Please type a question.")
+        return chat_history
+    try:
+        profile = user_profile.get_profile(user_id)
+        profile["preferences"]["favorite_categories"] = categories
+        user_profile.update_profile(user_id, profile)
+        logger.info(f"Updated preferences for user {user_id}: {categories}")
+        result = await rag_engine.process_query(query=message, user_id=user_id)
+        response = result.get("answer", "Sorry, I could not find an answer.")
+        sources = result.get("sources")
+        if sources:
+            response += "\n\n**Sources:**\n" + format_sources(sources)
+        chat_history.append((message, response))
+        logger.info(f"User {user_id} received response.")
+        return chat_history
+    except Exception as e:
+        error_message = f"An unexpected error occurred: {str(e)}"
+        logger.error(f"Error for user {user_id}: {error_message}", exc_info=True)
+        gr.Warning("Sorry, I encountered a problem. Please try again or rephrase your question.")
+        return chat_history
+def format_sources(sources: List[Dict[str, Any]]) -> str:
+    """Formats the source documents into a readable string."""
+    if not sources:
+        return ""
+    formatted_list = [f"- **{source.get('title', 'Unknown Source')}** (Category: {source.get('category', 'N/A')})" for source in sources]
+    return "\n".join(formatted_list)
+# ======================================================================================
+# Gradio UI Definition
+# ======================================================================================
+def handle_slider_change(value: int) -> None:
+    """
+    Handles the change event for the document loader slider.
+    Note: This currently only shows a notification. A restart is required.
+    """
+    gr.Info(f"Document limit set to {int(value)}. Please restart the app for changes to take effect.")
+def create_interface() -> gr.Blocks:
+    """Creates and configures the Gradio web interface."""
+    with gr.Blocks(
+        title="TravelMate - Your AI Travel Assistant",
+        theme=gr.themes.Base(),
+    ) as demo:
+        user_id = gr.State(lambda: str(uuid.uuid4()))
+        gr.Markdown("""
+        <div style="text-align: center;">
+            <h1 style="font-size: 2.5em;">✈️ TravelMate</h1>
+            <p style="font-size: 1.1em; color: #333;">Your AI-powered travel assistant. Ask me anything to plan your next trip!</p>
+        </div>
+        """)
+        with gr.Accordion("Advanced Settings", open=False):
+            doc_load_slider = gr.Slider(
+                minimum=100,
+                maximum=5000,
+                value=settings.MAX_DOCUMENTS_TO_LOAD,
+                step=100,
+                label="Documents to Load",
+                info="Controls how many documents are loaded for the RAG engine. Higher values may increase startup time.",
+            )
+            doc_load_slider.change(
+                fn=handle_slider_change, inputs=[doc_load_slider], outputs=None
+            )
+        with gr.Row():
+            with gr.Column(scale=2):
+                chatbot = gr.Chatbot(
+                    elem_id="chatbot",
+                    label="TravelMate Chat",
+                    height=600,
+                    show_label=False,
+                    show_copy_button=True,
+                    bubble_full_width=False,
+                    avatar_images=("assets/user_avatar.png", "assets/bot_avatar.png"),
+                )
+                with gr.Row():
+                    msg = gr.Textbox(
+                        placeholder="Ask me about destinations, flights, hotels...",
+                        show_label=False,
+                        container=False,
+                        scale=8,
+                    )
+                    submit_btn = gr.Button("Send", variant="primary", scale=1)
+            with gr.Column(scale=1):
+                gr.Markdown("### Select Your Interests")
+                categories = gr.CheckboxGroup(
+                    choices=[
+                        "Flights", "Hotels", "Destinations", "Activities",
+                        "Transportation", "Food & Dining", "Shopping",
+                        "Health & Safety", "Budget Planning"
+                    ],
+                    value=["Flights", "Hotels"],
+                    label="Travel Categories",
+                )
+                gr.Markdown("### Example Questions")
+                gr.Examples(
+                    examples=[
+                        "What are the best places to visit in Japan?",
+                        "How do I find cheap flights to Europe?",
+                        "What should I pack for a beach vacation?",
+                        "Tell me about local customs in Thailand",
+                        "What's the best time to visit Paris?",
+                    ],
+                    inputs=msg,
+                )
+        async def on_submit(message: str, history: List[List[str]], uid: str, cats: List[str]) -> Tuple[str, List[List[str]]]:
+            """Handles submission and returns updated values for the message box and chatbot."""
+            updated_history = await handle_chat_interaction(message, history, uid, cats)
+            return "", updated_history
+        submit_btn.click(on_submit, [msg, chatbot, user_id, categories], [msg, chatbot])
+        msg.submit(on_submit, [msg, chatbot, user_id, categories], [msg, chatbot])
+    return demo
+# ======================================================================================
+# Application Launch
+# ======================================================================================
+if __name__ == "__main__":
+    try:
+        app = create_interface()
+        app.queue(default_concurrency_limit=settings.GRADIO_CONCURRENCY_COUNT)
+        app.launch(
+            server_name=settings.GRADIO_SERVER_NAME,
+            server_port=settings.GRADIO_SERVER_PORT,
+            share=settings.GRADIO_SHARE,
+            show_error=True,
+            show_api=False,
+        )
+    except Exception as e:
+        logger.critical(f"Failed to launch Gradio app: {e}", exc_info=True)
+        raise

config/config.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+from pathlib import Path
+from typing import Optional
+from pydantic import model_validator
+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    """Manages all application settings using Pydantic for robust configuration.
+    Attributes:
+        BASE_DIR (Path): The root directory of the project.
+        DATA_DIR (Path): The directory for storing data.
+        # ... other attributes
+    """
+    # ----------------------------------------------------------------------------------
+    # Path Settings
+    # ----------------------------------------------------------------------------------
+    BASE_DIR: Path = Path(__file__).resolve().parent.parent
+    DATA_DIR: Path = BASE_DIR / "data"
+    VECTOR_STORE_DIR: Path = DATA_DIR / "vector_store"
+    USER_PROFILES_DIR: Path = DATA_DIR / "user_profiles"
+    CACHE_DIR: Path = DATA_DIR / "cache"
+    LOGS_DIR: Path = BASE_DIR / "logs"
+    # ----------------------------------------------------------------------------------
+    # Model & RAG Settings
+    # ----------------------------------------------------------------------------------
+    MODEL_NAME: str = "google/gemma-2b-it"
+    EMBEDDING_MODEL_NAME: str = os.getenv("EMBEDDING_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2")
+    DATASET_ID: str = "bitext/Bitext-travel-llm-chatbot-training-dataset"
+    HUGGINGFACE_API_TOKEN: Optional[str] = os.getenv("HUGGINGFACE_API_TOKEN")
+    # RAG pipeline settings
+    CHUNK_SIZE: int = 1000
+    CHUNK_OVERLAP: int = 100
+    MAX_DOCUMENTS_TO_LOAD: int = 50  # Drastically reduced for performance
+    TOP_K_RESULTS: int = 3
+    SIMILARITY_THRESHOLD: float = 0.7
+    # Model configuration for HuggingFaceEndpoint
+    TEMPERATURE: float = 0.7
+    MAX_NEW_TOKENS: int = 512  # Drastically reduced to combat latency
+    REPETITION_PENALTY: float = 1.2
+    # Production-Grade Prompt Template
+    QA_PROMPT_TEMPLATE: str = """You are TravelMate, an expert AI travel assistant.
+Use the following context to answer the user's question concisely and helpfully.
+If you don't know the answer, simply say that you don't know. Do not make up information.
+Context:
+{context}
+Question: {input}
+Answer:"""
+    # Cache settings
+    MAX_CACHE_SIZE: int = 1000
+    CACHE_TTL: int = 3600  # Time-to-live in seconds (1 hour)
+    # ----------------------------------------------------------------------------------
+    # Application Behavior Settings
+    # ----------------------------------------------------------------------------------
+    QUERY_TIMEOUT: int = 30  # seconds
+    MAX_MESSAGE_LENGTH: int = 500  # characters
+    MAX_CHAT_HISTORY: int = 20  # messages
+    # ----------------------------------------------------------------------------------
+    # API & Security Settings
+    # ----------------------------------------------------------------------------------
+    API_V1_STR: str = "/api/v1"
+    PROJECT_NAME: str = "TravelMate AI Assistant"
+    VERSION: str = "1.0.0"
+    DEBUG: bool = False
+    SECRET_KEY: str = os.getenv("SECRET_KEY", "a_very_secret_key")
+    JWT_SECRET_KEY: str = os.getenv("JWT_SECRET_KEY", "a_very_secret_jwt_key")
+    ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24  # 24 hours
+    # ----------------------------------------------------------------------------------
+    # Logging Settings
+    # ----------------------------------------------------------------------------------
+    LOG_LEVEL: str = "INFO"
+    LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    LOG_FILE_PATH: Path = LOGS_DIR / "app.log"
+    LOG_FILE_MAX_BYTES: int = 10 * 1024 * 1024  # 10MB
+    LOG_FILE_BACKUP_COUNT: int = 5
+    # ----------------------------------------------------------------------------------
+    # Gradio UI Settings
+    # ----------------------------------------------------------------------------------
+    GRADIO_SERVER_NAME: str = "0.0.0.0"
+    GRADIO_SERVER_PORT: int = 7860
+    GRADIO_SHARE: bool = True
+    GRADIO_CONCURRENCY_COUNT: int = 5
+    class Config:
+        env_file = ".env"
+        case_sensitive = True
+    @model_validator(mode='after')
+    def create_directories(self) -> 'Settings':
+        """Ensures that necessary directories exist upon settings initialization."""
+        self.DATA_DIR.mkdir(parents=True, exist_ok=True)
+        self.VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True)
+        self.USER_PROFILES_DIR.mkdir(parents=True, exist_ok=True)
+        self.CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        self.LOGS_DIR.mkdir(parents=True, exist_ok=True)
+        return self
+settings = Settings()

core/data_loader.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import os
+import json
+import logging
+import stat
+import time
+from typing import Any, List
+from config.config import settings
+from datasets import load_dataset
+from langchain.schema import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+logger = logging.getLogger(__name__)
+class DataLoader:
+    """Handles loading and processing of data for the RAG engine."""
+    def __init__(self):
+        """Initialize the data loader."""
+        self.data_dir = os.path.abspath("data")
+        self.travel_guides_path = os.path.join(self.data_dir, "travel_guides.json")
+        self.vector_store_path = os.path.join(self.data_dir, "vector_store", "faiss_index")
+        self._ensure_data_directories()
+        self._set_directory_permissions()
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=settings.CHUNK_SIZE,
+            chunk_overlap=settings.CHUNK_OVERLAP,
+            length_function=len,
+            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
+        )
+        self.max_file_size = 10 * 1024 * 1024  # 10MB
+    def _ensure_data_directories(self):
+        """Ensure necessary data directories exist."""
+        os.makedirs(self.data_dir, exist_ok=True)
+        os.makedirs(os.path.dirname(self.vector_store_path), exist_ok=True)
+        os.makedirs(os.path.join(self.data_dir, "cache"), exist_ok=True)
+    def _set_directory_permissions(self):
+        """Set secure permissions for data directories (755)."""
+        try:
+            for dir_path in [
+                self.data_dir,
+                os.path.dirname(self.vector_store_path),
+                os.path.join(self.data_dir, "cache"),
+            ]:
+                os.chmod(
+                    dir_path,
+                    stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH,
+                )
+        except Exception as e:
+            logger.error(f"Error setting directory permissions: {e}", exc_info=True)
+    def _validate_file_permissions(self, file_path: str) -> bool:
+        """Validate file permissions to ensure security."""
+        try:
+            if not os.path.exists(file_path):
+                return False
+            file_stat = os.stat(file_path)
+            if file_stat.st_mode & stat.S_IWOTH:  # Disallow world-writable
+                logger.warning(f"File {file_path} is world-writable. Skipping.")
+                return False
+            if file_stat.st_size > self.max_file_size:
+                logger.warning(f"File {file_path} exceeds size limit. Skipping.")
+                return False
+            return True
+        except Exception as e:
+            logger.error(f"Error validating file permissions for {file_path}: {e}", exc_info=True)
+            return False
+    def _load_dataset_with_retry(self, max_retries: int = 3) -> Any:
+        """Load dataset from Hugging Face with an exponential backoff retry mechanism."""
+        for attempt in range(max_retries):
+            try:
+                return load_dataset(
+                    settings.DATASET_ID,
+                    split="train",
+                    cache_dir=os.path.join(self.data_dir, "cache"),
+                )
+            except Exception as e:
+                logger.warning(f"Dataset loading attempt {attempt + 1} failed: {e}")
+                if attempt == max_retries - 1:
+                    logger.error("All attempts to load dataset failed.")
+                    return None
+                time.sleep(2 ** attempt)
+        return None
+    def load_documents(self) -> List[Document]:
+        """Load and process all documents for the knowledge base."""
+        documents = []
+        try:
+            # 1. Load Bitext Travel Dataset
+            logger.info(f"Loading dataset: {settings.DATASET_ID}")
+            dataset = self._load_dataset_with_retry()
+            if dataset:
+                max_docs = settings.MAX_DOCUMENTS_TO_LOAD
+                logger.info(f"Loading up to {max_docs} documents from the dataset.")
+                for i, item in enumerate(dataset):
+                    if i >= max_docs:
+                        logger.info(f"Reached document limit ({max_docs}).")
+                        break
+                    instruction = item.get("instruction")
+                    response = item.get("response")
+                    if not instruction or not response:
+                        logger.warning(f"Skipping item with missing instruction or response: {item}")
+                        continue
+                    page_content = f"User query: {instruction}\n\nChatbot response: {response}"
+                    metadata = {
+                        "source": "huggingface",
+                        "intent": item.get("intent"),
+                        "category": item.get("category"),
+                        "tags": item.get("tags"),
+                    }
+                    documents.append(Document(page_content=page_content, metadata=metadata))
+            # 2. Load Local Travel Guides
+            logger.info("Loading local travel guides...")
+            if os.path.exists(self.travel_guides_path) and self._validate_file_permissions(self.travel_guides_path):
+                with open(self.travel_guides_path, "r", encoding="utf-8") as f:
+                    guides = json.load(f)
+                for guide in guides:
+                    if not all(k in guide for k in ["title", "content", "category"]):
+                        logger.warning(f"Skipping malformed guide: {guide}")
+                        continue
+                    doc = Document(
+                        page_content=guide["content"],
+                        metadata={
+                            "title": guide["title"],
+                            "category": guide["category"],
+                            "source": "travel_guide",
+                        },
+                    )
+                    documents.append(doc)
+            else:
+                logger.info("Travel guides file not found or invalid. Skipping.")
+            logger.info(f"Loaded {len(documents)} documents in total.")
+            return documents
+        except Exception as e:
+            logger.error(f"A critical error occurred while loading documents: {e}", exc_info=True)
+            return []
+    def create_vector_store(self, documents: List[Document]):
+        """Create a FAISS vector store from documents."""
+        try:
+            logger.info("Creating vector store...")
+            embeddings = HuggingFaceEmbeddings(
+                model_name=settings.EMBEDDING_MODEL_NAME,
+                model_kwargs={"device": "cpu"},
+                encode_kwargs={"normalize_embeddings": True},
+            )
+            split_docs = self.text_splitter.split_documents(documents)
+            vector_store = FAISS.from_documents(
+                documents=split_docs,
+                embedding=embeddings,
+            )
+            vector_store.save_local(self.vector_store_path)
+            logger.info(f"Vector store created and saved to {self.vector_store_path} with {len(split_docs)} chunks.")
+        except Exception as e:
+            logger.error(f"Error creating vector store: {e}", exc_info=True)
+            raise
+    def initialize_knowledge_base(self):
+        """Initialize the complete knowledge base."""
+        try:
+            logger.info("Initializing knowledge base...")
+            documents = self.load_documents()
+            if not documents:
+                logger.error("No documents were loaded. Aborting knowledge base initialization.")
+                return
+            self.create_vector_store(documents)
+            logger.info("Knowledge base initialized successfully.")
+        except Exception as e:
+            logger.critical(f"Failed to initialize knowledge base: {e}", exc_info=True)
+            raise

core/rag_engine.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import logging
+from typing import List, Dict, Any
+from langchain.prompts import PromptTemplate
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.documents import Document
+from expiringdict import ExpiringDict
+from core.data_loader import DataLoader
+from core.user_profile import UserProfile
+from config.config import settings
+logger = logging.getLogger(__name__)
+class RAGEngine:
+    """
+    The core Retrieval-Augmented Generation engine for the TravelMate chatbot.
+    This class handles model initialization, vector store creation, and query processing.
+    """
+    def __init__(self, user_profile: UserProfile):
+        """
+        Initializes the RAG engine, loading models and setting up the QA chain.
+        """
+        self.user_profile = user_profile
+        self.query_cache = ExpiringDict(max_len=settings.MAX_CACHE_SIZE, max_age_seconds=settings.CACHE_TTL)
+        try:
+            self.embeddings = self._initialize_embeddings()
+            self.vector_store = self._initialize_vector_store()
+            self.llm = self._initialize_llm()
+            self.qa_chain = self._create_rag_chain()
+            logger.info("RAG Engine initialized successfully.")
+        except Exception as e:
+            logger.critical(f"Failed to initialize RAG Engine: {e}", exc_info=True)
+            raise
+    def _initialize_embeddings(self) -> HuggingFaceEmbeddings:
+        """Initializes the sentence-transformer embeddings model."""
+        return HuggingFaceEmbeddings(
+            model_name=settings.EMBEDDING_MODEL_NAME,
+            model_kwargs={'device': 'cpu'}
+        )
+    def _initialize_vector_store(self) -> FAISS:
+        """
+        Initializes the FAISS vector store.
+        Loads from disk if it exists, otherwise creates it from the data loader.
+        """
+        if settings.VECTOR_STORE_DIR.exists() and any(settings.VECTOR_STORE_DIR.iterdir()):
+            logger.info(f"Loading existing vector store from {settings.VECTOR_STORE_DIR}...")
+            return FAISS.load_local(
+                folder_path=str(settings.VECTOR_STORE_DIR),
+                embeddings=self.embeddings,
+                allow_dangerous_deserialization=True
+            )
+        else:
+            logger.info("Creating new vector store from scratch.")
+            data_loader = DataLoader()
+            documents = data_loader.load_documents()
+            if not documents:
+                raise ValueError("No documents were loaded. Cannot create vector store.")
+            vector_store = FAISS.from_documents(documents, self.embeddings)
+            logger.info(f"Saving new vector store to {settings.VECTOR_STORE_DIR}...")
+            vector_store.save_local(str(settings.VECTOR_STORE_DIR))
+            return vector_store
+    def _initialize_llm(self) -> HuggingFaceEndpoint:
+        """Initializes the Hugging Face Inference Endpoint for the LLM."""
+        if not settings.HUGGINGFACE_API_TOKEN:
+            raise ValueError("HUGGINGFACE_API_TOKEN is not set.")
+        return HuggingFaceEndpoint(
+            repo_id=settings.MODEL_NAME,
+            huggingfacehub_api_token=settings.HUGGINGFACE_API_TOKEN,
+            temperature=settings.TEMPERATURE,
+            max_new_tokens=settings.MAX_NEW_TOKENS,
+            repetition_penalty=settings.REPETITION_PENALTY,
+        )
+    def _create_rag_chain(self):
+        """Creates a modern, streamlined RAG chain for question answering."""
+        qa_prompt = PromptTemplate.from_template(settings.QA_PROMPT_TEMPLATE)
+        question_answer_chain = create_stuff_documents_chain(self.llm, qa_prompt)
+        retriever = self.vector_store.as_retriever(
+            search_type="similarity_score_threshold",
+            search_kwargs={'k': settings.TOP_K_RESULTS, 'score_threshold': settings.SIMILARITY_THRESHOLD}
+        )
+        rag_chain = create_retrieval_chain(retriever, question_answer_chain)
+        return rag_chain
+    def _format_sources(self, sources: List[Document]) -> List[Dict[str, Any]]:
+        """Formats source documents into a serializable list of dictionaries."""
+        if not sources:
+            return []
+        formatted_list = []
+        for source in sources:
+            metadata = source.metadata
+            source_name = metadata.get('source', 'Unknown Source')
+            if source_name == 'huggingface':
+                title = f"Dataset: {metadata.get('intent', 'N/A')}"
+                category = metadata.get('category', 'N/A')
+            elif source_name == 'local_guides':
+                title = f"Guide: {metadata.get('title', 'N/A')}"
+                category = metadata.get('category', 'N/A')
+            else:
+                title = "Unknown Source"
+                category = "N/A"
+            formatted_list.append({"title": title, "category": category})
+        return formatted_list
+    async def process_query(self, query: str, user_id: str) -> Dict[str, Any]:
+        """Processes a user query asynchronously using the streamlined RAG chain."""
+        cache_key = f"{user_id}:{query}"
+        if cache_key in self.query_cache:
+            logger.info(f"Returning cached response for query: {query}")
+            return self.query_cache[cache_key]
+        logger.info(f"Processing query for user {user_id}: {query}")
+        # The new chain expects 'input' instead of 'question'
+        chain_input = {"input": query}
+        try:
+            result = await self.qa_chain.ainvoke(chain_input)
+            answer = result.get("answer", "Sorry, I couldn't find an answer.")
+            # The new chain returns retrieved documents in the 'context' key
+            sources = self._format_sources(result.get("context", []))
+            response = {"answer": answer, "sources": sources}
+            self.query_cache[cache_key] = response
+            logger.info(f"Successfully processed query for user {user_id}")
+            return response
+        except Exception as e:
+            logger.error(f"Error processing query for user {user_id}: {e}", exc_info=True)
+            return {"answer": "I'm sorry, but I encountered an error while processing your request.", "sources": []}

core/user_profile.py ADDED Viewed

	@@ -0,0 +1,464 @@

+from typing import Dict, Any, Optional, List
+import json
+import os
+from datetime import datetime, timedelta
+import logging
+from pydantic import BaseModel, Field, validator
+from enum import Enum
+import time
+import re
+from config.config import settings
+import threading
+import shutil
+from pathlib import Path
+import hashlib
+logger = logging.getLogger(__name__)
+# Global lock for profile operations
+_profile_lock = threading.Lock()
+class TravelStyle(str, Enum):
+    BUDGET = "budget"
+    LUXURY = "luxury"
+    BALANCED = "balanced"
+class UserPreferences(BaseModel):
+    """User preferences model with validation."""
+    travel_style: str = Field(
+        default="balanced",
+        description="Preferred travel style (budget, luxury, balanced)",
+    )
+    preferred_destinations: list = Field(
+        default_factory=list,
+        description="List of preferred travel destinations",
+    )
+    dietary_restrictions: list = Field(
+        default_factory=list,
+        description="List of dietary restrictions",
+    )
+    accessibility_needs: list = Field(
+        default_factory=list,
+        description="List of accessibility requirements",
+    )
+    preferred_activities: list = Field(
+        default_factory=list,
+        description="List of preferred activities",
+    )
+    budget_range: Dict[str, float] = Field(
+        default_factory=lambda: {"min": 0, "max": float("inf")},
+        description="Budget range for travel",
+    )
+    preferred_accommodation: str = Field(
+        default="hotel",
+        description="Preferred type of accommodation",
+    )
+    preferred_transportation: str = Field(
+        default="flexible",
+        description="Preferred mode of transportation",
+    )
+    travel_frequency: str = Field(
+        default="occasional",
+        description="How often the user travels",
+    )
+    preferred_seasons: list = Field(
+        default_factory=list,
+        description="Preferred travel seasons",
+    )
+    special_requirements: list = Field(
+        default_factory=list,
+        description="Any special travel requirements",
+    )
+    @validator("travel_style")
+    def validate_travel_style(cls, v):
+        allowed_styles = ["budget", "luxury", "balanced"]
+        if v not in allowed_styles:
+            raise ValueError(f"Travel style must be one of {allowed_styles}")
+        return v
+    @validator("preferred_accommodation")
+    def validate_accommodation(cls, v):
+        allowed_types = [
+            "hotel",
+            "hostel",
+            "apartment",
+            "resort",
+            "camping",
+            "flexible",
+        ]
+        if v not in allowed_types:
+            raise ValueError(f"Accommodation type must be one of {allowed_types}")
+        return v
+    @validator("preferred_transportation")
+    def validate_transportation(cls, v):
+        allowed_types = [
+            "car",
+            "train",
+            "bus",
+            "plane",
+            "flexible",
+        ]
+        if v not in allowed_types:
+            raise ValueError(f"Transportation type must be one of {allowed_types}")
+        return v
+    @validator("travel_frequency")
+    def validate_frequency(cls, v):
+        allowed_frequencies = [
+            "rarely",
+            "occasional",
+            "frequent",
+            "very_frequent",
+        ]
+        if v not in allowed_frequencies:
+            raise ValueError(f"Travel frequency must be one of {allowed_frequencies}")
+        return v
+    @validator("budget_range")
+    def validate_budget(cls, v):
+        if v["min"] < 0:
+            raise ValueError("Minimum budget cannot be negative")
+        if v["max"] < v["min"]:
+            raise ValueError("Maximum budget must be greater than minimum budget")
+        return v
+class UserProfile:
+    def __init__(self):
+        """Initialize the user profile manager."""
+        self.profiles_dir = os.path.join("data", "user_profiles")
+        self.backup_dir = os.path.join("data", "user_profiles_backup")
+        self._ensure_directories()
+        self.rate_limit_window = 3600  # 1 hour
+        self.max_updates_per_window = 10
+        self.update_history: Dict[str, list] = {}
+        self.max_profile_size = 1024 * 1024  # 1MB
+    def _ensure_directories(self):
+        """Ensure necessary directories exist."""
+        os.makedirs(self.profiles_dir, exist_ok=True)
+        os.makedirs(self.backup_dir, exist_ok=True)
+    def _validate_user_id(self, user_id: str) -> bool:
+        """Validate user ID format."""
+        if not user_id or not isinstance(user_id, str):
+            return False
+        # Allow alphanumeric characters, hyphens, and underscores
+        return bool(re.match(r"^[a-zA-Z0-9-_]+$", user_id))
+    def _check_rate_limit(self, user_id: str) -> bool:
+        """Check if user has exceeded rate limit."""
+        current_time = time.time()
+        if user_id not in self.update_history:
+            self.update_history[user_id] = []
+        # Remove old entries
+        self.update_history[user_id] = [
+            t
+            for t in self.update_history[user_id]
+            if current_time - t < self.rate_limit_window
+        ]
+        # Check if limit exceeded
+        if len(self.update_history[user_id]) >= self.max_updates_per_window:
+            return False
+        # Add new entry
+        self.update_history[user_id].append(current_time)
+        return True
+    def _create_backup(self, user_id: str) -> None:
+        """Create a backup of the user profile."""
+        try:
+            profile_path = os.path.join(self.profiles_dir, f"{user_id}.json")
+            if not os.path.exists(profile_path):
+                return
+            # Create backup with timestamp
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            backup_path = os.path.join(self.backup_dir, f"{user_id}_{timestamp}.json")
+            shutil.copy2(profile_path, backup_path)
+            # Keep only the last 5 backups
+            backups = sorted(
+                Path(self.backup_dir).glob(f"{user_id}_*.json"),
+                key=lambda x: x.stat().st_mtime,
+                reverse=True,
+            )
+            for old_backup in backups[5:]:
+                old_backup.unlink()
+        except Exception as e:
+            logger.error(
+                f"Error creating backup for {user_id}: {str(e)}", exc_info=True
+            )
+    def _cleanup_old_profiles(self):
+        """Clean up profiles older than 30 days."""
+        try:
+            current_time = time.time()
+            for filename in os.listdir(self.profiles_dir):
+                if not filename.endswith(".json"):
+                    continue
+                file_path = os.path.join(self.profiles_dir, filename)
+                file_time = os.path.getmtime(file_path)
+                if current_time - file_time > 30 * 24 * 3600:  # 30 days
+                    try:
+                        # Create final backup before deletion
+                        user_id = filename[:-5]  # Remove .json extension
+                        self._create_backup(user_id)
+                        os.remove(file_path)
+                        logger.info(f"Removed old profile: {filename}")
+                    except Exception as e:
+                        logger.warning(
+                            f"Error removing old profile {filename}: {str(e)}"
+                        )
+        except Exception as e:
+            logger.error(f"Error cleaning up old profiles: {str(e)}", exc_info=True)
+    def get_profile(self, user_id: str) -> Dict[str, Any]:
+        """Get user profile with validation."""
+        try:
+            if not self._validate_user_id(user_id):
+                raise ValueError("Invalid user ID format")
+            profile_path = os.path.join(self.profiles_dir, f"{user_id}.json")
+            with _profile_lock:
+                if not os.path.exists(profile_path):
+                    return self._create_default_profile(user_id)
+                # Check file size
+                if os.path.getsize(profile_path) > self.max_profile_size:
+                    raise ValueError("Profile file size exceeds limit")
+                with open(profile_path, "r", encoding="utf-8") as f:
+                    profile = json.load(f)
+                # Validate profile structure
+                if not isinstance(profile, dict):
+                    raise ValueError("Invalid profile format")
+                # Ensure all required fields exist
+                required_fields = ["user_id", "preferences", "created_at", "updated_at"]
+                if not all(field in profile for field in required_fields):
+                    raise ValueError("Missing required profile fields")
+                return profile
+        except Exception as e:
+            logger.error(
+                f"Error getting profile for {user_id}: {str(e)}", exc_info=True
+            )
+            raise
+    def update_profile(
+        self, user_id: str, preferences: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Update user profile with validation and rate limiting."""
+        try:
+            if not self._validate_user_id(user_id):
+                raise ValueError("Invalid user ID format")
+            if not self._check_rate_limit(user_id):
+                raise ValueError("Rate limit exceeded")
+            # Validate preferences
+            try:
+                validated_preferences = UserPreferences(**preferences)
+            except Exception as e:
+                raise ValueError(f"Invalid preferences: {str(e)}")
+            profile_path = os.path.join(self.profiles_dir, f"{user_id}.json")
+            with _profile_lock:
+                # Create backup before update
+                self._create_backup(user_id)
+                current_profile = self.get_profile(user_id)
+                # Update profile
+                current_profile["preferences"] = validated_preferences.dict()
+                current_profile["updated_at"] = datetime.utcnow().isoformat()
+                # Save updated profile
+                with open(profile_path, "w", encoding="utf-8") as f:
+                    json.dump(current_profile, f, indent=2)
+                logger.info(f"Updated profile for user {user_id}")
+                return current_profile
+        except Exception as e:
+            logger.error(
+                f"Error updating profile for {user_id}: {str(e)}", exc_info=True
+            )
+            raise
+    def _create_default_profile(self, user_id: str) -> Dict[str, Any]:
+        """Create a default profile with validation."""
+        try:
+            if not self._validate_user_id(user_id):
+                raise ValueError("Invalid user ID format")
+            default_preferences = UserPreferences().dict()
+            profile = {
+                "user_id": user_id,
+                "preferences": default_preferences,
+                "created_at": datetime.utcnow().isoformat(),
+                "updated_at": datetime.utcnow().isoformat(),
+            }
+            profile_path = os.path.join(self.profiles_dir, f"{user_id}.json")
+            with _profile_lock:
+                with open(profile_path, "w", encoding="utf-8") as f:
+                    json.dump(profile, f, indent=2)
+                logger.info(f"Created default profile for user {user_id}")
+                return profile
+        except Exception as e:
+            logger.error(
+                f"Error creating default profile for {user_id}: {str(e)}", exc_info=True
+            )
+            raise
+    def delete_profile(self, user_id: str) -> None:
+        """Delete user profile with validation."""
+        try:
+            if not self._validate_user_id(user_id):
+                raise ValueError("Invalid user ID format")
+            profile_path = os.path.join(self.profiles_dir, f"{user_id}.json")
+            with _profile_lock:
+                if os.path.exists(profile_path):
+                    # Create final backup before deletion
+                    self._create_backup(user_id)
+                    os.remove(profile_path)
+                    logger.info(f"Deleted profile for user {user_id}")
+                else:
+                    logger.warning(f"Profile not found for user {user_id}")
+        except Exception as e:
+            logger.error(
+                f"Error deleting profile for {user_id}: {str(e)}", exc_info=True
+            )
+            raise
+    def get_recommendations(self, user_id: str) -> Dict[str, Any]:
+        """Get personalized recommendations based on user profile with validation."""
+        try:
+            profile = self.get_profile(user_id)
+            if not profile or "preferences" not in profile:
+                return {}
+            preferences = UserPreferences(**profile["preferences"])
+            recommendations = {
+                "destinations": self._get_destination_recommendations(preferences),
+                "activities": self._get_activity_recommendations(preferences),
+                "tips": self._get_personalized_tips(preferences),
+                "generated_at": datetime.now().isoformat(),
+            }
+            return recommendations
+        except Exception as e:
+            logger.error(f"Error getting recommendations: {str(e)}", exc_info=True)
+            return {}
+    def _get_destination_recommendations(self, profile: UserPreferences) -> List[str]:
+        """Get destination recommendations based on preferences."""
+        try:
+            recommendations = []
+            # Add recommendations based on favorite destinations
+            if profile.preferred_destinations:
+                recommendations.extend(profile.preferred_destinations[:3])
+            # Add recommendations based on interests
+            if "beach" in profile.preferred_activities:
+                recommendations.append("Bali, Indonesia")
+            if "culture" in profile.preferred_activities:
+                recommendations.append("Kyoto, Japan")
+            if "food" in profile.preferred_activities:
+                recommendations.append("Bangkok, Thailand")
+            # Add recommendations based on travel style
+            if profile.travel_style == TravelStyle.LUXURY:
+                recommendations.append("Dubai, UAE")
+            elif profile.travel_style == TravelStyle.BUDGET:
+                recommendations.append("Bangkok, Thailand")
+            return list(set(recommendations))[:5]  # Return top 5 unique recommendations
+        except Exception as e:
+            logger.error(
+                f"Error getting destination recommendations: {str(e)}", exc_info=True
+            )
+            return []
+    def _get_activity_recommendations(self, profile: UserPreferences) -> List[str]:
+        """Get activity recommendations based on preferences."""
+        try:
+            activities = []
+            # Add activities based on interests
+            if "culture" in profile.preferred_activities:
+                activities.append("Visit local museums and historical sites")
+            if "food" in profile.preferred_activities:
+                activities.append("Try local cuisine and food tours")
+            if "nature" in profile.preferred_activities:
+                activities.append("Explore national parks and hiking trails")
+            if "adventure" in profile.preferred_activities:
+                activities.append("Try adventure sports and activities")
+            # Add activities based on travel style
+            if profile.travel_style == TravelStyle.LUXURY:
+                activities.append("Book private guided tours")
+            elif profile.travel_style == TravelStyle.BUDGET:
+                activities.append("Explore local markets and street food")
+            return list(set(activities))[:5]  # Return top 5 unique activities
+        except Exception as e:
+            logger.error(
+                f"Error getting activity recommendations: {str(e)}", exc_info=True
+            )
+            return []
+    def _get_personalized_tips(self, profile: UserPreferences) -> List[str]:
+        """Get personalized travel tips based on preferences."""
+        try:
+            tips = []
+            # Add tips based on travel style
+            if profile.travel_style == TravelStyle.BUDGET:
+                tips.append(
+                    "Look for local markets and street food for affordable meals"
+                )
+                tips.append("Consider staying in hostels or guesthouses")
+            elif profile.travel_style == TravelStyle.LUXURY:
+                tips.append("Book premium experiences and private tours in advance")
+                tips.append("Consider luxury resorts and boutique hotels")
+            # Add tips based on dietary restrictions
+            if profile.dietary_restrictions:
+                tips.append(
+                    f"Research restaurants that accommodate {', '.join(profile.dietary_restrictions)}"
+                )
+            # Add tips based on accessibility needs
+            if profile.accessibility_needs:
+                tips.append(
+                    f"Research accessibility features for {', '.join(profile.accessibility_needs)}"
+                )
+            return list(set(tips))[:5]  # Return top 5 unique tips
+        except Exception as e:
+            logger.error(f"Error getting personalized tips: {str(e)}", exc_info=True)
+            return []

data/.gitkeep ADDED Viewed

	@@ -0,0 +1 @@


1	+

docs/API.md ADDED Viewed

	@@ -0,0 +1,294 @@

+# TravelMate AI Assistant API Documentation
+## Overview
+The TravelMate AI Assistant API provides a comprehensive set of endpoints for interacting with an AI-powered travel assistant. The API uses RAG (Retrieval-Augmented Generation) to provide accurate and contextually relevant travel information.
+## Base URL
+```
+https://api.travelmate.ai/v1
+```
+## Authentication
+All API endpoints require authentication using JWT (JSON Web Tokens). Include the token in the Authorization header:
+```
+Authorization: Bearer <your_token>
+```
+## Rate Limiting
+The API implements rate limiting to ensure fair usage:
+- 100 requests per hour per user
+- Rate limit headers are included in responses:
+  - `X-RateLimit-Limit`: Maximum requests per window
+  - `X-RateLimit-Remaining`: Remaining requests in current window
+  - `X-RateLimit-Reset`: Time until rate limit resets
+## Endpoints
+### Chat
+#### POST /chat
+Process a chat message and get AI-generated response.
+**Request Body:**
+```json
+{
+  "message": "What are the best places to visit in Paris?",
+  "chat_history": [
+    {
+      "user": "Hello",
+      "assistant": "Hi! How can I help you with your travel plans?"
+    }
+  ]
+}
+```
+**Response:**
+```json
+{
+  "answer": "Here are some must-visit places in Paris...",
+  "sources": [
+    {
+      "title": "Paris Travel Guide",
+      "url": "https://example.com/paris-guide",
+      "relevance_score": 0.95
+    }
+  ],
+  "suggested_questions": [
+    "What's the best time to visit the Eiffel Tower?",
+    "Are there any hidden gems in Paris?"
+  ]
+}
+```
+### User Profile
+#### GET /profile
+Get the current user's profile.
+**Response:**
+```json
+{
+  "user_id": "user_123",
+  "preferences": {
+    "travel_style": "balanced",
+    "preferred_destinations": ["Paris", "Tokyo"],
+    "dietary_restrictions": [],
+    "accessibility_needs": [],
+    "preferred_activities": ["sightseeing", "food"],
+    "budget_range": {
+      "min": 1000,
+      "max": 5000
+    },
+    "preferred_accommodation": "hotel",
+    "preferred_transportation": "flexible",
+    "travel_frequency": "occasional",
+    "preferred_seasons": ["spring", "fall"],
+    "special_requirements": []
+  }
+}
+```
+#### PUT /profile/preferences
+Update user preferences.
+**Request Body:**
+```json
+{
+  "travel_style": "luxury",
+  "preferred_destinations": ["Paris", "Tokyo", "New York"],
+  "budget_range": {
+    "min": 2000,
+    "max": 10000
+  }
+}
+```
+**Response:**
+```json
+{
+  "message": "Preferences updated successfully"
+}
+```
+### Health Check
+#### GET /health
+Check the health status of the API and its components.
+**Response:**
+```json
+{
+  "status": "healthy",
+  "timestamp": "2024-02-20T12:00:00Z",
+  "version": "1.0.0",
+  "environment": "production",
+  "components": {
+    "rag_engine": "ok",
+    "user_profile": "ok"
+  }
+}
+```
+## Error Handling
+The API uses standard HTTP status codes and returns error responses in the following format:
+```json
+{
+  "error": "Error type",
+  "detail": "Detailed error message",
+  "timestamp": "2024-02-20T12:00:00Z",
+  "request_id": "req_123"
+}
+```
+Common error codes:
+- 400: Bad Request
+- 401: Unauthorized
+- 403: Forbidden
+- 404: Not Found
+- 429: Too Many Requests
+- 500: Internal Server Error
+- 503: Service Unavailable
+## Best Practices
+1. **Error Handling**
+   - Always check response status codes
+   - Implement exponential backoff for retries
+   - Handle rate limiting gracefully
+2. **Performance**
+   - Cache responses when appropriate
+   - Minimize chat history size
+   - Use compression for large requests
+3. **Security**
+   - Keep tokens secure
+   - Use HTTPS for all requests
+   - Validate all input data
+4. **Rate Limiting**
+   - Monitor rate limit headers
+   - Implement request queuing
+   - Handle 429 responses appropriately
+## SDKs and Examples
+### Python
+```python
+import requests
+class TravelMateClient:
+    def __init__(self, api_key, base_url="https://api.travelmate.ai/v1"):
+        self.api_key = api_key
+        self.base_url = base_url
+        self.session = requests.Session()
+        self.session.headers.update({
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        })
+    def chat(self, message, chat_history=None):
+        response = self.session.post(
+            f"{self.base_url}/chat",
+            json={
+                "message": message,
+                "chat_history": chat_history or []
+            }
+        )
+        response.raise_for_status()
+        return response.json()
+    def get_profile(self):
+        response = self.session.get(f"{self.base_url}/profile")
+        response.raise_for_status()
+        return response.json()
+    def update_preferences(self, preferences):
+        response = self.session.put(
+            f"{self.base_url}/profile/preferences",
+            json=preferences
+        )
+        response.raise_for_status()
+        return response.json()
+```
+### JavaScript
+```javascript
+class TravelMateClient {
+    constructor(apiKey, baseUrl = 'https://api.travelmate.ai/v1') {
+        this.apiKey = apiKey;
+        this.baseUrl = baseUrl;
+    }
+    async chat(message, chatHistory = []) {
+        const response = await fetch(`${this.baseUrl}/chat`, {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${this.apiKey}`,
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({
+                message,
+                chat_history: chatHistory
+            })
+        });
+        if (!response.ok) {
+            throw new Error(`API error: ${response.statusText}`);
+        }
+        return response.json();
+    }
+    async getProfile() {
+        const response = await fetch(`${this.baseUrl}/profile`, {
+            headers: {
+                'Authorization': `Bearer ${this.apiKey}`
+            }
+        });
+        if (!response.ok) {
+            throw new Error(`API error: ${response.statusText}`);
+        }
+        return response.json();
+    }
+    async updatePreferences(preferences) {
+        const response = await fetch(`${this.baseUrl}/profile/preferences`, {
+            method: 'PUT',
+            headers: {
+                'Authorization': `Bearer ${this.apiKey}`,
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify(preferences)
+        });
+        if (!response.ok) {
+            throw new Error(`API error: ${response.statusText}`);
+        }
+        return response.json();
+    }
+}
+```
+## Support
+For API support, please contact:
+- Email: [email protected]
+- Documentation: https://docs.travelmate.ai
+- Status Page: https://status.travelmate.ai

huggingface.yaml ADDED Viewed

	@@ -0,0 +1,113 @@

+sdk: gradio
+sdk_version: 4.19.2
+app_file: app.py
+python_version: "3.10"
+# Hardware requirements
+hardware:
+  cpu: 2
+  memory: 16GB
+# Build settings
+build:
+  cuda: "None" # No CUDA needed for CPU-only
+  system_packages:
+    - build-essential
+    - python3-dev
+    - cmake
+    - pkg-config
+    - libopenblas-dev
+    - libomp-dev
+# Environment variables
+env:
+  - MODEL_NAME=meta-llama/Llama-2-7b-chat-hf
+  - EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+  - SECRET_KEY=${SECRET_KEY}
+  - JWT_SECRET_KEY=${JWT_SECRET_KEY}
+  - RATE_LIMIT_REQUESTS=100
+  - RATE_LIMIT_WINDOW=3600
+  - LOG_LEVEL=INFO
+# Dependencies
+dependencies:
+  - gradio==4.19.2
+  - langchain==0.1.9
+  - langchain-core>=0.1.52,<0.2
+  - langchain-community==0.0.27
+  - langchain-text-splitters==0.0.1
+  - langchain-huggingface==0.0.3
+  - transformers==4.38.2
+  - torch==2.2.1
+  - accelerate==0.27.2
+  - bitsandbytes==0.42.0
+  - safetensors==0.4.2
+  - sentence-transformers==2.6.1
+  - faiss-cpu==1.7.4
+  - pydantic==2.5.3
+  - pydantic-settings==2.1.0
+  - python-dotenv==1.0.0
+  - fastapi==0.109.2
+  - uvicorn==0.27.1
+  - python-jose==3.3.0
+  - passlib==1.7.4
+  - python-multipart
+  - bcrypt==4.1.2
+  - httpx==0.26.0
+  - aiohttp==3.9.5
+  - tenacity==8.2.3
+  - cachetools==5.3.2
+  - numpy==1.26.3
+  - tqdm==4.66.1
+  - loguru==0.7.2
+  - datasets==2.16.1
+  - huggingface-hub==0.24.1
+  - circuitbreaker==1.4.0
+# Health check
+health_check:
+  path: /health
+  interval: 300
+  timeout: 10
+  retries: 3
+# Resource limits
+resources:
+  cpu: 2
+  memory: 16GB
+# Cache settings
+cache:
+  enabled: true
+  ttl: 3600
+  max_size: 1000
+# Logging
+logging:
+  level: INFO
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+  handlers:
+    - type: file
+      filename: app.log
+      max_bytes: 10485760
+      backup_count: 5
+    - type: stream
+      stream: ext://sys.stdout
+# Space settings
+space:
+  title: "TravelMate - AI Travel Assistant"
+  description: "An AI-powered travel assistant using Llama-2 and RAG to help plan trips and provide travel information"
+  license: mit
+  sdk: gradio
+  app_port: 7860
+  app_url: "https://huggingface.co/spaces/bharadwaj-m/TravelMate-AI"
+# Build commands
+build:
+  - pip install -r requirements.txt
+  - mkdir -p data/vector_store data/user_profiles data/cache
+  - python -c "from core.data_loader import DataLoader; DataLoader().initialize_knowledge_base()"
+# Run command
+run: python app.py

requirements.txt ADDED Viewed

	@@ -0,0 +1,44 @@

+# Core frameworks and UI
+gradio==4.19.2
+fastapi==0.109.2
+uvicorn==0.27.1
+# LangChain and ecosystem (aligned versions)
+langchain==0.1.9
+langchain-core>=0.1.52,<0.2
+langchain-community==0.0.27
+langchain-text-splitters==0.0.1
+langchain-huggingface==0.0.3
+# LLM and Transformers
+transformers==4.41.2
+torch==2.2.1
+accelerate==0.27.2
+# Embeddings / similarity search
+sentence-transformers==2.6.1
+faiss-cpu==1.7.4
+datasets==2.16.1
+# Security and auth
+python-jose==3.3.0
+passlib==1.7.4
+bcrypt==4.1.2
+python-multipart
+# Data & utils
+pydantic==2.5.3
+pydantic-settings==2.1.0
+python-dotenv==1.0.0
+httpx==0.26.0
+aiohttp==3.9.5
+tenacity==8.2.3
+expiringdict==1.2.1
+numpy==1.26.3
+tqdm==4.66.1
+loguru==0.7.2
+huggingface-hub==0.24.1
+# Resilience / patterns
+circuitbreaker==1.4.0