<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Trl on AI Knowledge Base</title><link>https://learn-ai.blindshot.kz/source/trl/</link><description>Recent content in Trl on AI Knowledge Base</description><generator>Hugo</generator><language>en-us</language><atom:link href="https://learn-ai.blindshot.kz/source/trl/index.xml" rel="self" type="application/rss+xml"/><item><title>Asynchronous GRPO</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/async_grpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/async_grpo_trainer/</guid><description/></item><item><title>BCO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/bco_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/bco_trainer/</guid><description/></item><item><title>BEMA for Reference Model</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/bema_for_reference_model/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/bema_for_reference_model/</guid><description/></item><item><title>Callbacks</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/callbacks/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/callbacks/</guid><description/></item><item><title>Chat template utilities</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/chat_template_utils/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/chat_template_utils/</guid><description/></item><item><title>Chat Templates</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/chat_templates/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/chat_templates/</guid><description/></item><item><title>Command Line Interfaces (CLIs)</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/clis/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/clis/</guid><description/></item><item><title>Community Tutorials</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/community_tutorials/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/community_tutorials/</guid><description/></item><item><title>CPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/cpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/cpo_trainer/</guid><description/></item><item><title>Data Utilities</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/data_utils/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/data_utils/</guid><description/></item><item><title>Dataset formats and types</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/dataset_formats/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/dataset_formats/</guid><description/></item><item><title>DeepSpeed Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/deepspeed_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/deepspeed_integration/</guid><description/></item><item><title>Distillation Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/distillation_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/distillation_trainer/</guid><description/></item><item><title>Distributing Training</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/distributing_training/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/distributing_training/</guid><description/></item><item><title>DPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/dpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/dpo_trainer/</guid><description/></item><item><title>Examples</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/example_overview/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/example_overview/</guid><description/></item><item><title>Experimental</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/experimental_overview/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/experimental_overview/</guid><description/></item><item><title>General Online Logit Distillation (GOLD) Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gold_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gold_trainer/</guid><description/></item><item><title>Generalized Knowledge Distillation Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gkd_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gkd_trainer/</guid><description/></item><item><title>GFPO</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gfpo/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gfpo/</guid><description/></item><item><title>GRPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/grpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/grpo_trainer/</guid><description/></item><item><title>GRPO With Replay Buffer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/grpo_with_replay_buffer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/grpo_with_replay_buffer/</guid><description/></item><item><title>GSPO-token</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gspo_token/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/gspo_token/</guid><description/></item><item><title>Installation</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/installation/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/installation/</guid><description/></item><item><title>Kernels Hub Integration and Usage</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/kernels_hub/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/kernels_hub/</guid><description/></item><item><title>KTO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/kto_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/kto_trainer/</guid><description/></item><item><title>Liger Kernel Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/liger_kernel_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/liger_kernel_integration/</guid><description/></item><item><title>LoRA Without Regret</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/lora_without_regret/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/lora_without_regret/</guid><description/></item><item><title>MiniLLM Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/minillm_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/minillm_trainer/</guid><description/></item><item><title>Nash-MD Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/nash_md_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/nash_md_trainer/</guid><description/></item><item><title>NeMo Gym Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/nemo_gym/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/nemo_gym/</guid><description/></item><item><title>Online DPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/online_dpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/online_dpo_trainer/</guid><description/></item><item><title>OpenEnv Integration for Training LLMs with Environments</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/openenv/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/openenv/</guid><description/></item><item><title>OpenReward Integration for Training LLMs with Environments</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/openreward/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/openreward/</guid><description/></item><item><title>ORPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/orpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/orpo_trainer/</guid><description/></item><item><title>Paper Index</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/paper_index/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/paper_index/</guid><description/></item><item><title>PAPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/papo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/papo_trainer/</guid><description/></item><item><title>PEFT Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/peft_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/peft_integration/</guid><description/></item><item><title>Post-Training Toolkit Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/ptt_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/ptt_integration/</guid><description/></item><item><title>PPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/ppo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/ppo_trainer/</guid><description/></item><item><title>PRM Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/prm_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/prm_trainer/</guid><description/></item><item><title>Quickstart</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/quickstart/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/quickstart/</guid><description/></item><item><title>RapidFire AI Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/rapidfire_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/rapidfire_integration/</guid><description/></item><item><title>Reducing Memory Usage</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/reducing_memory_usage/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/reducing_memory_usage/</guid><description/></item><item><title>Reward Functions</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/rewards/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/rewards/</guid><description/></item><item><title>Reward Modeling</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/reward_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/reward_trainer/</guid><description/></item><item><title>RLOO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/rloo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/rloo_trainer/</guid><description/></item><item><title>Scripts Utilities</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/script_utils/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/script_utils/</guid><description/></item><item><title>SDFT</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/sdft_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/sdft_trainer/</guid><description/></item><item><title>SDPO</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/sdpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/sdpo_trainer/</guid><description/></item><item><title>SFT Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/sft_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/sft_trainer/</guid><description>&lt;p&gt;Supervised fine-tuning is the simplest and most common way to adapt a model to your data, and the SFTTrainer is where most TRL users begin. Pay close attention to dataset format: it accepts both language-modeling and prompt-completion shapes and auto-applies the chat template for conversational data, so mismatched formats are the most common source of silent quality loss. Two gotchas worth remembering are that completion-only loss is on by default for prompt-completion datasets, and that training adapters via PEFT usually wants a higher learning rate near 1e-4. Read the TRL overview first, and pair this with the PEFT LoRA guide when you train adapters.&lt;/p&gt;</description></item><item><title>Speeding Up Training</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/speeding_up_training/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/speeding_up_training/</guid><description/></item><item><title>SSD</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/ssd_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/ssd_trainer/</guid><description/></item><item><title>TPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/tpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/tpo_trainer/</guid><description/></item><item><title>Trackio Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/trackio_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/trackio_integration/</guid><description/></item><item><title>Training customization</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/customization/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/customization/</guid><description/></item><item><title>Training with Jobs</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/jobs_training/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/jobs_training/</guid><description/></item><item><title>TRL - Transformers Reinforcement Learning</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/_overview/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/_overview/</guid><description>&lt;p&gt;This overview maps the whole TRL post-training stack — SFT, reward modeling, DPO, PPO, GRPO, and more — so it matters as the decision page for which trainer fits your alignment goal. Focus on the taxonomy of online versus offline methods, since that split drives compute cost and data requirements more than any single hyperparameter. TRL integrates tightly with Transformers and PEFT, so you can train adapters rather than full models. Start here, then go to the SFT trainer, the most common starting point for instruction tuning.&lt;/p&gt;</description></item><item><title>Unsloth Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/unsloth_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/unsloth_integration/</guid><description/></item><item><title>Usage Stats Collection</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/usage_stats/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/usage_stats/</guid><description/></item><item><title>Use model after training</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/use_model/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/use_model/</guid><description/></item><item><title>vLLM Integration</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/vllm_integration/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/vllm_integration/</guid><description/></item><item><title>XPO Trainer</title><link>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/xpo_trainer/</link><pubDate>Mon, 01 Jan 0001 00:00:00 +0000</pubDate><guid>https://learn-ai.blindshot.kz/docs/trl/v1.5.1/xpo_trainer/</guid><description/></item></channel></rss>