Mluo's workspace
Runs
5
State
Notes
User
Tags
Created
Runtime
Sweep
_source_run_id
actor_rollout_ref.actor.clip_ratio
actor_rollout_ref.actor.clip_ratio_high
actor_rollout_ref.actor.clip_ratio_low
actor_rollout_ref.actor.entropy_coeff
actor_rollout_ref.actor.fsdp_config.fsdp_size
actor_rollout_ref.actor.fsdp_config.grad_offload
actor_rollout_ref.actor.fsdp_config.optimizer_offload
actor_rollout_ref.actor.fsdp_config.param_offload
actor_rollout_ref.actor.fsdp_config.wrap_policy.min_num_params
actor_rollout_ref.actor.grad_clip
actor_rollout_ref.actor.kl_loss_coef
actor_rollout_ref.actor.kl_loss_type
actor_rollout_ref.actor.optim.lr
actor_rollout_ref.actor.optim.lr_warmup_steps_ratio
actor_rollout_ref.actor.optim.total_training_steps
actor_rollout_ref.actor.optim.warmup_style
actor_rollout_ref.actor.ppo_epochs
actor_rollout_ref.actor.ppo_max_token_len_per_gpu
actor_rollout_ref.actor.ppo_micro_batch_size
actor_rollout_ref.actor.ppo_mini_batch_size
actor_rollout_ref.actor.shuffle
actor_rollout_ref.actor.strategy
actor_rollout_ref.actor.ulysses_sequence_parallel_size
actor_rollout_ref.actor.use_dynamic_bsz
actor_rollout_ref.actor.use_kl_loss
actor_rollout_ref.actor.use_token_level_loss
actor_rollout_ref.hybrid_engine
actor_rollout_ref.model.enable_gradient_checkpointing
actor_rollout_ref.model.path
actor_rollout_ref.model.use_remove_padding
actor_rollout_ref.ref.fsdp_config.param_offload
actor_rollout_ref.ref.fsdp_config.wrap_policy.min_num_params
actor_rollout_ref.ref.log_prob_max_token_len_per_gpu
actor_rollout_ref.ref.log_prob_micro_batch_size
actor_rollout_ref.ref.log_prob_use_dynamic_bsz
actor_rollout_ref.ref.ulysses_sequence_parallel_size
actor_rollout_ref.rollout.async_engine
actor_rollout_ref.rollout.compute_reward
actor_rollout_ref.rollout.disable_log_stats
actor_rollout_ref.rollout.do_sample
actor_rollout_ref.rollout.dtype
actor_rollout_ref.rollout.enable_chunked_prefill
actor_rollout_ref.rollout.enable_tools
Finished
-
mluo
1s
-
rllm/deepscaler/lneoh94r
0.2
-
-
0.0015
-1
-
false
false
0
1
0.0001
low_var_kl
5.0000e-7
0
2130
constant
1
27000
-
32
false
fsdp
1
true
false
-
false
true
checkpoints/deepscaler/deepscaler-code-32k-easy/global_step_320/actor/checkpoint
true
true
0
27000
-
true
1
true
true
true
true
bfloat16
true
-
Finished
-
mluo
1s
-
AxT-dev/deepcoder/a4rjh1cu
0.2
0.28
0.2
0
-1
false
false
false
0
1
0
low_var_kl
0.000001
0
22000
constant
1
24000
16
64
false
fsdp
2
true
false
true
true
true
/data/mluo/checkpoints/deepcoder/14b-16k-grpo-code/actor/global_step_160
true
true
0
24000
128
true
2
-
true
-
true
bfloat16
true
-
Finished
-
mluo
1s
-
AxT-dev/deepcoder/iu8ofnf4
0.2
0.28
0.2
0
-1
false
false
false
0
1
0
low_var_kl
0.000001
0
22000
constant
1
20000
16
64
false
fsdp
1
true
false
true
true
true
checkpoints/deepcoder/14b-16k-grpo-code/actor/global_step_10
true
false
0
20000
128
true
1
-
true
-
true
bfloat16
true
-
Finished
-
mluo
1s
-
AxT-dev/deepcoder/igrku3m2
0.2
-
-
0.001
-1
false
false
false
0
1
0.001
low_var_kl
0.000001
0
22000
constant
1
20000
16
64
false
fsdp
1
true
true
-
true
true
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
true
false
0
20000
128
true
1
-
true
-
true
bfloat16
true
-
Finished
-
mluo
1s
-
AxT-dev/deepcoder/vmj9litd
0.2
-
-
0.001
-1
false
false
false
0
1
0.001
low_var_kl
0.000001
0
22000
constant
1
20000
16
64
false
fsdp
1
true
true
-
true
true
deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
true
false
0
20000
128
true
1
-
true
-
true
bfloat16
true
-
1-5
of 5