Rx31's workspace
Runs
5
Name
5 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
actor_init_on_gpu
actor_learning_rate
actor_num_gpus_per_node
actor_num_nodes
actor_scheduler
adam_betas
adam_offload
add_think_token
advantage_estimator
apply_chat_template
aux_loss_coef
bf16
ckpt_path
colocate_actor_ref
colocate_critic_reward
critic_learning_rate
critic_num_gpus_per_node
critic_num_nodes
disable_fast_tokenizer
disable_trace_cache
enable_ema
enable_prefix_caching
enforce_eager
entropy_coef
eps_clip
eval_steps
eval_temperature
eval_temperature_at_k
eval_top_p
extra_eval_task_fast
extra_eval_task_fast_supress_orig_eval
extra_eval_trajectory_temperature
filter_samples_by_reward
flash_attn
freezing_actor_steps
gamma
generate_max_len
gradient_checkpointing
gradient_checkpointing_use_reentrant
init_kl_coef
input_key
input_template
l2
label_key
Killed
-
rx31
13h 5m 7s
-
false
5.0000e-7
2
1
constant
[0.9,0.95]
false
0
group_norm
true
0
true
/mmfs1/home/rx31/projects/Rethink_RLVR/code/outputs/DeepScaleR-Qwen2.5-Math-7B/0607/DeepScaleR-RLVR-random0.5/ckpt
true
false
0.000009
2
1
false
false
false
false
false
0
0.2
1
0
0.6
1
AIME2025-TTT@8,AIME-TTT@8,AMC-TTT@8,MATH-TTT@1
true
1
false
true
-1
1
3072
true
false
0
prompt
-
0
answer
Killed
-
rx31
11h 59m 14s
-
false
5.0000e-7
2
1
constant
[0.9,0.95]
false
0
group_norm
true
0
true
/mmfs1/home/rx31/projects/Rethink_RLVR/code/outputs/DeepScaleR-Qwen2.5-Math-7B/0607/DeepScaleR-RLVR-box_only_format/ckpt
true
false
0.000009
2
1
false
false
false
false
false
0
0.2
1
0
0.6
1
AIME2025-TTT@8,AIME-TTT@8,AMC-TTT@8,MATH-TTT@1
true
1
false
true
-1
1
3072
true
false
0
prompt
-
0
answer
Killed
-
rx31
16h 15m 9s
-
false
5.0000e-7
2
1
constant
[0.9,0.95]
false
0
group_norm
true
0
true
/mmfs1/home/rx31/projects/Rethink_RLVR/code/outputs/DeepScaleR_mv_labeled_qwen2.5_math_7b_incorrect-Qwen2.5-Math-7B/0608/DeepScaleR_mv_labeled_qwen2.5_math_7b_incorrect-RLVR-math/ckpt
true
false
0.000009
2
1
false
false
false
false
false
0
0.2
1
0
0.6
1
AIME2025-TTT@8,AIME-TTT@8,AMC-TTT@8,MATH-TTT@1
true
1
false
true
-1
1
3072
true
false
0
prompt
-
0
answer
Killed
-
rx31
13h 7m 25s
-
false
5.0000e-7
2
1
constant
[0.9,0.95]
false
0
group_norm
true
0
true
/mmfs1/home/rx31/projects/Rethink_RLVR/code/outputs/DeepScaleR_mv_labeled_qwen2.5_math_7b-Qwen2.5-Math-7B/0608/DeepScaleR_mv_labeled_qwen2.5_math_7b-RLVR-math/ckpt
true
false
0.000009
2
1
false
false
false
false
false
0
0.2
1
0
0.6
1
AIME2025-TTT@8,AIME-TTT@8,AMC-TTT@8,MATH-TTT@1
true
1
false
true
-1
1
3072
true
false
0
prompt
-
0
answer
Killed
-
rx31
12h 3m 57s
-
false
5.0000e-7
2
1
constant
[0.9,0.95]
false
0
group_norm
true
0
true
/mmfs1/home/rx31/projects/Rethink_RLVR/code/outputs/DeepScaleR-Qwen2.5-Math-7B/0611/DeepScaleR-RLVR-math/ckpt
true
false
0.000009
2
1
false
false
false
false
false
0
0.2
1
0
0.6
1
AIME2025-TTT@8,AIME-TTT@8,AMC-TTT@8,MATH-TTT@1
true
1
false
true
-1
1
3072
true
false
0
prompt
-
0
answer
1-5
of 5