Skip to main content

trlx: Repro `bnb` perf after t5 update

Created on January 10|Last edited on January 10
Example Used:
Config:
train:
seq_length: 48
epochs: 10
total_steps: 80000
batch_size: 8

checkpoint_interval: 10000
eval_interval: 100

pipeline: "PromptPipeline"
orchestrator: "PPOOrchestrator"
trainer: "AcceleratePPOTrainer"
entity_name: "jon-tow"

model:
model_path: "EleutherAI/gpt-j-6B"
tokenizer_path: "gpt2"
num_layers_unfrozen: 8

optimizer:
# name: "adamw"
# 8-bit Optimizer Settings
name: "adamw_8bit_bnb"

kwargs:
lr: 1.4e-5
betas: [0.9, 0.95]
eps: 1.0e-8
weight_decay: 1.0e-6

scheduler:
name: "cosine_annealing"
kwargs:
T_max: 80000 # train.total_steps
eta_min: 1.0e-4

method:
name: "ppoconfig"
num_rollouts: 8
chunk_size: 8
ppo_epochs: 4
init_kl_coef: 0.2
target: 6
horizon: 10000
gamma: 1
lam: 0.95
cliprange: 0.2
cliprange_value: 0.2
vf_coef: 0.2
scale_reward: "running"
ref_mean: null
ref_std: null
cliprange_reward: 10
gen_kwargs:
max_new_tokens: 40
top_k: 0
top_p: 0.7
do_sample: True
temperature: 1.0


Result


Run set
2




Run set
2



Run set
2



Run set
2



Run set
2



Run set
2



Run set
2



Run set
2



Run set
2



Run set
2