- sections: - local: index title: TRL - local: quickstart title: Quickstart - local: installation title: Installation - local: how_to_train title: PPO Training FAQ - local: use_model title: Use Trained Models - local: customization title: Customize the Training - local: logging title: Understanding Logs title: Get started - sections: - local: models title: Model Classes - local: trainer title: Trainer Classes - local: reward_trainer title: Reward Model Training - local: sft_trainer title: Supervised Fine-Tuning - local: ppo_trainer title: PPO Trainer - local: best_of_n title: Best of N Sampling - local: dpo_trainer title: DPO Trainer - local: ddpo_trainer title: Denoising Diffusion Policy Optimization - local: iterative_sft_trainer title: Iterative Supervised Fine-Tuning - local: text_environments title: Text Environments title: API - sections: - local: example_overview title: Example Overview - local: sentiment_tuning title: Sentiment Tuning - local: lora_tuning_peft title: Training with PEFT - local: detoxifying_a_lm title: Detoxifying a Language Model - local: using_llama_models title: Training StackLlama - local: learning_tools title: Learning to Use Tools - local: multi_adapter_rl title: Multi Adapter RLHF title: Examples