model_list: - model_name: gpt-3.5-turbo-end-user-test litellm_params: model: gpt-3.5-turbo region_name: "eu" model_info: id: "1" - model_name: gpt-3.5-turbo-end-user-test litellm_params: model: azure/chatgpt-v-2 api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_version: "2023-05-15" api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault - model_name: gpt-3.5-turbo litellm_params: model: azure/chatgpt-v-2 api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_version: "2023-05-15" api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault - model_name: gpt-3.5-turbo-large litellm_params: model: "gpt-3.5-turbo-1106" api_key: os.environ/OPENAI_API_KEY rpm: 480 timeout: 300 stream_timeout: 60 - model_name: gpt-4 litellm_params: model: azure/chatgpt-v-2 api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_version: "2023-05-15" api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault rpm: 480 timeout: 300 stream_timeout: 60 - model_name: sagemaker-completion-model litellm_params: model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4 input_cost_per_second: 0.000420 - model_name: text-embedding-ada-002 litellm_params: model: azure/azure-embedding-model api_key: os.environ/AZURE_API_KEY api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_version: "2023-05-15" model_info: mode: embedding base_model: text-embedding-ada-002 - model_name: dall-e-2 litellm_params: model: azure/ api_version: 2023-06-01-preview api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ api_key: os.environ/AZURE_API_KEY - model_name: openai-dall-e-3 litellm_params: model: dall-e-3 - model_name: fake-openai-endpoint litellm_params: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - model_name: fake-openai-endpoint-2 litellm_params: model: openai/my-fake-model api_key: my-fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ stream_timeout: 0.001 rpm: 1 - model_name: fake-openai-endpoint-3 litellm_params: model: openai/my-fake-model api_key: my-fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ stream_timeout: 0.001 rpm: 1000 - model_name: fake-openai-endpoint-4 litellm_params: model: openai/my-fake-model api_key: my-fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ num_retries: 50 - model_name: fake-openai-endpoint-3 litellm_params: model: openai/my-fake-model-2 api_key: my-fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ stream_timeout: 0.001 rpm: 1000 - model_name: bad-model litellm_params: model: openai/bad-model api_key: os.environ/OPENAI_API_KEY api_base: https://exampleopenaiendpoint-production.up.railway.app/ mock_timeout: True timeout: 60 rpm: 1000 model_info: health_check_timeout: 1 - model_name: good-model litellm_params: model: openai/bad-model api_key: os.environ/OPENAI_API_KEY api_base: https://exampleopenaiendpoint-production.up.railway.app/ rpm: 1000 model_info: health_check_timeout: 1 - model_name: "*" litellm_params: model: openai/* api_key: os.environ/OPENAI_API_KEY # provider specific wildcard routing - model_name: "anthropic/*" litellm_params: model: "anthropic/*" api_key: os.environ/ANTHROPIC_API_KEY - model_name: "bedrock/*" litellm_params: model: "bedrock/*" - model_name: "groq/*" litellm_params: model: "groq/*" api_key: os.environ/GROQ_API_KEY - model_name: mistral-embed litellm_params: model: mistral/mistral-embed - model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model litellm_params: model: text-completion-openai/gpt-3.5-turbo-instruct - model_name: fake-openai-endpoint-5 litellm_params: model: openai/my-fake-model api_key: my-fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ timeout: 1 litellm_settings: # set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production drop_params: True # max_budget: 100 # budget_duration: 30d num_retries: 5 request_timeout: 600 telemetry: False context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}] default_team_settings: - team_id: team-1 success_callback: ["langfuse"] failure_callback: ["langfuse"] langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1 langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1 - team_id: team-2 success_callback: ["langfuse"] failure_callback: ["langfuse"] langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2 langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2 langfuse_host: https://us.cloud.langfuse.com # For /fine_tuning/jobs endpoints finetune_settings: - custom_llm_provider: azure api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY api_version: "2023-03-15-preview" - custom_llm_provider: openai api_key: os.environ/OPENAI_API_KEY # for /files endpoints files_settings: - custom_llm_provider: azure api_base: os.environ/AZURE_API_BASE api_key: os.environ/AZURE_API_KEY api_version: "2023-03-15-preview" - custom_llm_provider: openai api_key: os.environ/OPENAI_API_KEY router_settings: routing_strategy: usage-based-routing-v2 redis_host: os.environ/REDIS_HOST redis_password: os.environ/REDIS_PASSWORD redis_port: os.environ/REDIS_PORT enable_pre_call_checks: true model_group_alias: {"my-special-fake-model-alias-name": "fake-openai-endpoint-3"} general_settings: master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys store_model_in_db: True proxy_budget_rescheduler_min_time: 60 proxy_budget_rescheduler_max_time: 64 proxy_batch_write_at: 1 database_connection_pool_limit: 10 # database_url: "postgresql://:@:/" # [OPTIONAL] use for token-based auth to proxy pass_through_endpoints: - path: "/v1/rerank" # route you want to add to LiteLLM Proxy Server target: "https://api.cohere.com/v1/rerank" # URL this route should forward requests to headers: # headers to forward to this URL content-type: application/json # (Optional) Extra Headers to pass to this endpoint accept: application/json forward_headers: True # environment_variables: # settings for using redis caching # REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com # REDIS_PORT: "16337" # REDIS_PASSWORD: