Alexisrozhkov's workspace
Runs
23
Runtime
Hostname
Notes
State
Tags
trl_ppo_trainer_config.learning_rate
trl_ppo_trainer_config.model_name
trl_ppo_trainer_config.optimize_cuda_cache
trl_ppo_trainer_config.optimize_device_cache
trl_ppo_trainer_config.score_clip
trl_ppo_trainer_config.use_score_norm
trl_ppo_trainer_config.use_score_scaling
env/reward_mean
env/reward_std
mean_confidence
mean_score
objective/entropy
objective/kl
objective/kl_coef
pearsonr
ppo/learning_rate
ppo/loss/policy
ppo/loss/total
ppo/loss/value
ppo/mean_non_score_reward
ppo/mean_scores
ppo/policy/advantages_mean
ppo/policy/approxkl
ppo/policy/clipfrac
ppo/policy/entropy
ppo/policy/policykl
ppo/returns/mean
ppo/returns/var
ppo/std_scores
ppo/val/clipfrac
ppo/val/error
ppo/val/mean
ppo/val/var
ppo/val/var_explained
ppo/val/vpred
time/ppo/calc_stats
time/ppo/compute_advantages
time/ppo/compute_rewards
time/ppo/forward_pass
18h 28m 12s
-
Killed
0.0000141
triviaqa-sft-balanced10x1000-06.04-17-score_conf_modulation
true
false
-
false
false
0.24118
0.47425
0.45703
0.48831
5.10977
1.1938
0.05784
0.67805
0.0000141
-0.021981
-0.017004
0.049765
-0.0060248
0.24118
0.0037937
0.0091852
0.049537
1.48202
0.019826
0.19152
0.13285
0.47425
0.0015965
0.099451
0.20519
0.073979
0.25141
0.19469
0.034012
0.0062637
0.021263
8.7265
11h 56m 33s
-
Killed
0.0000141
triviaqa-sft-balanced10x1000-06.03-23-score_lambda1.0-nocalib
true
false
-
false
false
0.48068
0.38259
0.49531
0.52534
3.50041
0.43404
0.091048
0.44182
0.0000141
-0.014989
-0.01008
0.049085
-0.0033881
0.48068
0.0075972
0.0022937
0.012982
0.48471
0.0026332
0.47003
0.084117
0.38259
0
0.098169
0.48567
0.05662
-0.16705
0.4664
0.035064
0.0058305
0.021068
8.74496
12h 1m 42s
-
Killed
0.0000141
triviaqa-sft-balanced10x1000-06.03-10-score_lambda0.3
true
false
-
false
false
0.0047285
0.21373
0.48203
0.41566
2.80404
0.50608
0.091048
0.57917
0.0000141
-0.0078296
-0.0058392
0.019904
-0.0039137
0.0047285
0.00083538
0.0016034
0.012627
0.35026
0.0027671
-0.012623
0.022604
0.21373
0
0.039808
0.0053349
0.022757
-0.76113
-0.008562
0.033308
0.0060499
0.021509
8.75248
11h 57m 33s
-
Killed
0.0000141
triviaqa-sft-balanced10x1000-06.02-22
true
false
-
false
false
-0.080162
0.14858
0.43984
0.39124
2.95545
0.59466
0.091516
0.52803
0.0000141
-0.0085214
-0.0067543
0.017671
-0.0044368
-0.080162
-0.0009451
0.0022238
0.014571
0.38254
0.0060705
-0.11015
0.011067
0.14858
0
0.035342
-0.10855
0.024156
-2.19334
-0.11262
0.034302
0.0066423
0.022061
8.8503
1-4
of 4