api_keys:
  gemini_api_key: ""
  openai_api_key: ""
  pinecone_api_key: ""
backup:
  enabled: false
  include_configuration: true
  include_documents: true
  include_logs: false
  include_vector_db: true
  interval_hours: 24
  retention_days: 30
  storage_path: backups/
customization:
  custom_css: ""
  default_query_examples:
    - What is the main topic of the uploaded documents?
    - Can you summarize the key points?
    - What are the important findings mentioned?
  favicon_url: ""
  footer_text: ""
  help_text: ""
  logo_url: ""
  welcome_message: ""
deployment:
  auto_scale: true
  development:
    debug_mode: true
    enable_profiling: true
    log_level: DEBUG
  enable_metrics: true
  graceful_shutdown_timeout: 30
  health_check_interval: 60
  health_endpoint: /health
  max_cpu_percent: 80
  max_disk_usage_mb: 5120
  max_memory_mb: 2048
  metrics_endpoint: /metrics
  platform: huggingface
  production:
    debug_mode: false
    enable_profiling: false
    log_level: WARNING
  staging:
    debug_mode: true
    enable_profiling: true
    log_level: INFO
development:
  debug_mode: false
  enable_test_endpoints: false
  mock_apis: false
  profiling_enabled: false
  save_intermediate_results: false
  test_data_path: data/test_data
  test_mode: false
document_processing:
  chunk_overlap: 200
  chunk_size: 1000
  detect_language: true
  extract_images: false
  extract_metadata: true
  max_file_size_mb: 50
  min_chunk_size: 100
  preserve_formatting: true
  supported_formats:
    - .pdf
    - .docx
    - .doc
    - .csv
    - .xlsx
    - .xls
    - .pptx
    - .txt
    - .md
  supported_languages:
    - en
    - es
    - fr
    - de
    - it
    - pt
    - ru
    - zh
    - ja
    - ko
embedding:
  batch_size: 1
  cache_embeddings: true
  fallback_model: sentence-transformers
  max_retries: 3
  max_tokens: 8192
  model: gemini-embedding-exp-03-07
  output_dimensionality: 3072
  rate_limit_delay: 1.0
  retry_delay: 2
  task_type: RETRIEVAL_DOCUMENT
  title: ""
features:
  async_processing: false
  audio_processing: false
  auto_summarization: false
  batch_processing: true
  content_recommendation: false
  document_upload: true
  image_processing: false
  live_search: true
  multi_language_support: false
  query_processing: true
  question_generation: false
  real_time_updates: false
  url_processing: true
  video_processing: false
integrations:
  aws_s3:
    access_key: ""
    bucket_name: ""
    enabled: false
    secret_key: ""
  google_analytics:
    enabled: false
    tracking_id: ""
  huggingface:
    api_key: ""
    enabled: false
    models: []
  postgresql:
    connection_string: ""
    enabled: false
  sentry:
    dsn: ""
    enabled: false
logging:
  backup_count: 5
  component_levels:
    document_processing: INFO
    embedding: INFO
    rag: INFO
    ui: INFO
    url_processing: INFO
    vector_db: INFO
  file: logs/rag_ai.log
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  level: INFO
  max_file_size_mb: 10
notifications:
  email:
    enabled: false
    from_address: ""
    password: ""
    smtp_port: 587
    smtp_server: ""
    to_addresses: []
    username: ""
  enabled: false
  webhook:
    enabled: false
    events:
      - error
      - system_health
      - processing_complete
    url: ""
performance:
  batch_processing_size: 10
  cache_ttl: 3600
  enable_caching: true
  enable_parallel_processing: true
  garbage_collection_interval: 300
  max_concurrent_requests: 5
  max_memory_usage_mb: 1024
  max_worker_threads: 4
  request_timeout: 30
rag:
  confidence_threshold: 0.3
  context_window_overlap: 0.1
  deduplicate_results: true
  enable_query_caching: true
  enable_query_expansion: true
  fallback_model: gpt-3.5-turbo
  include_sources: true
  max_context_length: 8000
  max_response_length: 2000
  max_tokens: 500
  model: gemini-2.5-flash-preview-05-20
  query_cache_ttl: 7200
  rerank_results: true
  similarity_threshold: 0.4
  temperature: 0.7
  top_k: 10
  top_p: 0.9
live_search:
  enabled: true
  enable_caching: true
  include_raw_content: true
  max_results: 10
  search_depth: basic
  time_range: month
query_router:
  confidence_threshold: 0.5
  enable_hybrid_search: true
  live_weight: 0.4
  local_weight: 0.6
  max_hybrid_results: 10
security:
  allowed_domains: []
  blocked_content_types:
    - executable
    - script
  blocked_domains:
    - localhost
    - 127.0.0.1
    - 0.0.0.0
  enable_content_filtering: true
  enable_rate_limiting: true
  max_text_length: 1000000
  max_upload_size_mb: 100
  requests_per_hour: 1000
  requests_per_minute: 60
  sanitize_input: true
ui:
  demo_mode: false
  description:
    Upload documents or provide URLs to build your knowledge base, then
    ask questions!
  features:
    analytics_dashboard: true
    confidence_display: true
    file_upload: true
    knowledge_base_management: true
    query_interface: true
    source_display: true
    system_health_monitoring: true
    url_input: true
  max_file_uploads: 10
  max_query_length: 1000
  port: 7860
  sample_documents: []
  server_name: 0.0.0.0
  share: false
  show_advanced_options: true
  theme: default
  title: "\xF0\u0178\xA7\_ AI Embedded Knowledge Agent"
url_processing:
  allowed_domains: []
  blocked_domains:
    - localhost
    - 127.0.0.1
    - 0.0.0.0
  delay_between_requests: 0.5
  extract_main_content: true
  follow_links: true
  max_depth: 1
  max_pages: 10
  remove_ads: true
  remove_navigation: true
  requests_per_second: 2
  respect_robots_txt: true
  timeout: 10
  user_agent: RAG-AI-Bot/1.0
vector_db:
  batch_size: 100
  create_index_if_not_exists: true
  dimension: 3072
  environment: us-east-1
  fallback_provider: memory
  index_name: rag-ai-index
  max_retries: 3
  metric: cosine
  provider: pinecone
  retry_delay: 1