Kastan's group workspace
Group: gpt2_2d_TP4_PP2_8B
Name
24 visualized
Name: gpt2_2d_TP4_PP2_8B
Name: gpt2_2d_TP4_PP2_8B
24
State
Notes
User
Tags
Created
Runtime
Sweep
BATCH_SIZE
HIDDEN_SIZE
LEARNING_RATE
MODE
NUM_EPOCHS
NUM_MICRO_BATCHES
PIPELINE
SEQ_LEN
SEQ_LENGTH
TENSOR_PARALLEL
TENSOR_PARALLEL_MODE
TENSOR_PARALLEL_SIZE
TENSOR_SHAPE
TOTAL_BATCH_SIZE
VOCAB_SIZE
WARMUP_EPOCHS
WEIGHT_DECAY
backend
clip_grad_norm
colossal_config_file
config
data_dir
fp16.mode
from_torch
gpt2_8B
gpt2_small
gpt2_xl
gradient_accumulation
loss.type
loss_fn.type
model.checkpoint
model.dtype
model.fuse_scale_mask_softmax
model.max_position_embeddings
model.type
model.vocab_size
optimizer.lr
optimizer.type
optimizer.weight_decay
parallel.pipeline
parallel.tensor.mode
parallel.tensor.size
zero.model_config.reuse_fp16_shard
zero.model_config.shard_strategy
Crashed
-
kastan
36m 38s
-
8
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
32m 26s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
32m 31s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
31m 55s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
32m 4s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
30m 48s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
30m 32s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
32m 1s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Crashed
-
kastan
32m 31s
-
16
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Failed
-
kastan
7s
-
4
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
Failed
-
kastan
14s
-
4
-
-
2d
60
4
2
1024
-
4
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
gpt2_configs/gpt2_2d_TP4_PP2_8B.py
/u/kastanday/colossal_ai/raw_json_backup/train_data_FINAL.json
AMP_TYPE.NAIVE
false
titans.model.gpt.gpt.gpt2_8B
titans.model.gpt.gpt.gpt2_small
-
-
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
-
true
-
-
-
titans.model.gpt.gpt.gpt2_8B
-
0.00015
torch.optim.adam.Adam
0.01
2
2d
4
-
-
1-1
of 1