Brandony's workspace
Runs
87
Name
4 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
activation
adlr_autoresume
adlr_autoresume_interval
apply_query_key_layer_scaling
attention_config
attention_dropout
attention_softmax_in_fp32
batch_size
bias_dropout_fusion
bias_gelu_fusion
char_level_ppl
checkpoint_activations
checkpoint_factor
checkpoint_in_cpu
checkpoint_num_layers
checkpoint_scale
checkpoint_validation_with_forward_pass
clip_grad
config_files.125M.yml
config_files.410M_baseline.yml
config_files.local_setup.yml
contiguous_checkpointing
coord_check
create_moe_param_group
curriculum_seqlen
data_impl
data_path
deepscale
deepspeed
deepspeed_activation_checkpointing
deepspeed_mpi
deepspeed_slurm
detect_nvlink_pairs
distributed_backend
dump_state
dynamic_loss_scale
enable_expert_tensor_parallelism
eod_mask_loss
eval_interval
eval_iters
eval_results_prefix
expert_interval
finetune
force_multi
Crashed
-
brandony
22s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Finished
-
brandony
1m 52s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
8
false
false
false
true
10000
false
1
linear
false
1
-
{
"pipe_parallel_size": 0,
"model_parallel_size": 1,
"context_parallel_size": 1,
"num_layers": 12,
"hidden_size": 256,
"num_attention_heads": 8,
"seq_length": 1024,
"max_position_embeddings": 1024,
"pos_emb": "rotary",
"rotary_pct": 0.25,
"no_weight_tying": true,
"gpt_j_residual": false,
"output_layer_parallelism": "column",
"attention_config": [[["ring"], 12]],
# "scaled_upper_triang_masked_softmax_fusion": true,
# "bias_gelu_fusion": true,
"init_method": "small_init",
"output_layer_init_method": "wang_init",
"use_qk_layernorm": False,
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.0003,
"betas": [0.9, 0.95],
"eps": 1.0e-8
}
},
"min_lr": 0.00003,
"zero_optimization": {
"stage": 1,
"allgather_partitions": true,
"allgather_bucket_size": 500000000,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 500000000,
"contiguous_gradients": true,
"cpu_offload": false,
},
"train_micro_batch_size_per_gpu": 8,
"data_impl": "mmap",
"num_workers": 1,
"checkpoint_activations": true,
"checkpoint_num_layers": 1,
"partition_activations": false,
"synchronize_each_layer": true,
"gradient_clipping": 1.0,
"weight_decay": 0.1,
"hidden_dropout": 0,
"attention_dropout": 0,
# "fp16": {
# "fp16": true,
# "enabled": true,
# "loss_scale": 0,
# "loss_scale_window": 1000,
# "initial_scale_power": 12,
# "hysteresis": 2,
# "min_loss_scale": 1
# },
"precision": "bfloat16",
"fp32_allreduce": true,
"bf16": {
"enabled": true
},
"train_iters": 1000,
"lr_decay_iters": 1000,
"distributed_backend": "nccl",
"lr_decay_style": "cosine",
"warmup": 0.01,
"checkpoint_factor": 10000,
"extra_save_iters": [100],
"eval_interval": 100,
"eval_iters": 50,
"log_interval": 10,
"steps_per_print": 10,
"wall_clock_breakdown": true,
# "save": "/checkpoint/hielab/brandon/ckpt",
# "load": "/checkpoint/hielab/brandon/ckpt",
#"load": "/mnt/hdd-0/tiny-pythia/ckpts/pythia-14m",
"log_grad_norm": true,
# "data_path": "/mnt/ssd-2/pile_deduped/pile_20B_tokenizer_text_document",
# "train-data-paths": ["/mnt/ssd-2/pile_deduped/pile_20B_tokenizer_text_document"],
# "valid-data-paths": ["/mnt/ssd-2/pile_deduped/pile_20B_tokenizer_text_document"],
# "test-data-paths": ["/mnt/ssd-2/pile_deduped/pile_20B_tokenizer_text_document"],
"data_path": "/home/brandon/gpt-neox/data/enwik8/enwik8_text_document",
"vocab_file": "/home/brandon/gpt-neox/data/gpt2-vocab.json",
"merge_file": "/home/brandon/gpt-neox/data/gpt2-merges.txt",
"log_dir": "/scratch/hielab/brandon",
"use_wandb": true,
"wandb_host": "https://api.wandb.ai",
"wandb_group": "410M_enwiki8_test_seq_parallel_pp1",
"wandb_project": "neox"
}
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Crashed
-
brandony
17s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Crashed
-
brandony
17s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
brandony
2m 34s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Finished
-
brandony
2m 49s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Finished
-
brandony
2m 35s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Crashed
-
brandony
30s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Finished
-
brandony
2m 31s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Finished
-
brandony
16s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Finished
-
brandony
2m 35s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Finished
-
brandony
2m 29s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Crashed
-
brandony
23s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Crashed
-
brandony
23s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Crashed
-
brandony
23s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Crashed
-
brandony
25s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
Crashed
-
brandony
23s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Crashed
-
brandony
25s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Crashed
-
brandony
25s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
brandony
3m 12s
-
gelu
false
1000
false
["ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring","ring"]
0
false
32
false
false
false
true
10000
false
1
linear
false
1
-
-
-
false
false
true
0
mmap
/home/brandon/gpt-neox/data/enwik8/enwik8_text_document
false
true
true
false
false
false
nccl
false
true
false
false
100
50
2
false
false
1-20
of 87