Annavettoruzzo's group workspace
Group: ablation-1e18
State
Notes
User
Tags
Created
Runtime
Sweep
account_for_embedding_in_pipeline_split
account_for_loss_in_pipeline_split
accumulate_allreduce_grads_in_fp32
adam_beta1
adam_beta2
adam_eps
add_bias_linear
add_position_embedding
add_qkv_bias
adlr_autoresume
adlr_autoresume_interval
align_grad_reduce
align_param_gather
app_tag_run_version
apply_layernorm_1p
apply_query_key_layer_scaling
apply_residual_connection_post_layernorm
apply_rope_fusion
async_tensor_model_parallel_allreduce
attention_backend
attention_dropout
attention_softmax_in_fp32
auto_detect_ckpt_format
barrier_with_L1_time
bert_binary_head
bert_embedder_type
bf16
bias_dropout_fusion
bias_gelu_fusion
bias_swiglu_fusion
biencoder_projection_dim
biencoder_shared_query_context_model
calc_ft_timeouts
calculate_per_token_loss
check_for_large_grads
check_for_nan_in_loss_and_grad
check_for_spiky_loss
ckpt_assume_constant_structure
ckpt_convert_update_legacy_dist_opt_format
ckpt_format
ckpt_fully_parallel_load
ckpt_fully_parallel_save
ckpt_fully_parallel_save_deprecated
classes_fraction
Finished
Add notes...
zichun
1h 24m 51s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
zichun
35m 23s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
zichun
1h 10m 14s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
zichun
30m 44s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
zichun
24m 55s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
zichun
20m 29s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
zichun
23m 33s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
zichun
18m 11s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
32m 32s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
19m 56s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
22m 23s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
26m 38s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
43m 33s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
36m 57s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
31m 22s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
Finished
Add notes...
haok
27m 39s
-
false
false
true
0.9
0.999
1.0000e-8
false
true
false
false
1000
true
false
0.0.0
false
false
false
true
true
auto
0
false
false
true
true
megatron
true
true
false
true
0
false
false
false
false
true
false
false
false
torch_dist
false
true
false
1
1-16
of 16