Jonhue's workspace
Runs
48
Name
48 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
actor_rollout_ref.actor.checkpoint.contents
actor_rollout_ref.actor.clip_ratio
actor_rollout_ref.actor.clip_ratio_c
actor_rollout_ref.actor.clip_ratio_high
actor_rollout_ref.actor.clip_ratio_low
actor_rollout_ref.actor.entropy_coeff
actor_rollout_ref.actor.fsdp_config.fsdp_size
actor_rollout_ref.actor.fsdp_config.optimizer_offload
actor_rollout_ref.actor.fsdp_config.param_offload
actor_rollout_ref.actor.fsdp_config.wrap_policy.min_num_params
actor_rollout_ref.actor.grad_clip
actor_rollout_ref.actor.kl_loss_coef
actor_rollout_ref.actor.kl_loss_type
actor_rollout_ref.actor.loss_agg_mode
actor_rollout_ref.actor.optim.lr
actor_rollout_ref.actor.optim.lr_warmup_steps
actor_rollout_ref.actor.optim.lr_warmup_steps_ratio
actor_rollout_ref.actor.optim.total_training_steps
actor_rollout_ref.actor.optim.warmup_style
actor_rollout_ref.actor.optim.weight_decay
actor_rollout_ref.actor.ppo_epochs
actor_rollout_ref.actor.ppo_max_token_len_per_gpu
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu
actor_rollout_ref.actor.ppo_mini_batch_size
actor_rollout_ref.actor.shuffle
actor_rollout_ref.actor.strategy
actor_rollout_ref.actor.ulysses_sequence_parallel_size
actor_rollout_ref.actor.use_dynamic_bsz
actor_rollout_ref.actor.use_kl_loss
actor_rollout_ref.actor.use_torch_compile
actor_rollout_ref.hybrid_engine
actor_rollout_ref.model.enable_gradient_checkpointing
actor_rollout_ref.model.path
actor_rollout_ref.model.use_liger
actor_rollout_ref.model.use_remove_padding
actor_rollout_ref.ref.fsdp_config.param_offload
actor_rollout_ref.ref.fsdp_config.wrap_policy.min_num_params
actor_rollout_ref.ref.log_prob_max_token_len_per_gpu
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu
actor_rollout_ref.ref.log_prob_use_dynamic_bsz
actor_rollout_ref.ref.strategy
actor_rollout_ref.ref.ulysses_sequence_parallel_size
actor_rollout_ref.rollout.disable_log_stats
actor_rollout_ref.rollout.do_sample
Finished
-
jonhue
10s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
2m 16s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
9s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
2m 16s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
9s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
10s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
7s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
2m 7s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
7s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
54s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
58s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
1m 7s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
4s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
5s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
6s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
8s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
6s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
6s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
6s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
Finished
-
jonhue
5s
-
["model","optimizer","extra"]
0.2
3
0.28
0.2
0
-1
false
false
0
1
0.001
low_var_kl
token-mean
0.000001
-1
0
250
constant
0.01
1
10240
2
1
false
fsdp
1
false
false
true
true
true
Qwen/Qwen3-8B
false
true
true
0
10240
2
false
fsdp
1
true
true
1-20
of 48