Kastan's group workspace
Group: gpt2_zero3
Name
24 visualized
Name: gpt2_zero3
Name: gpt2_zero3
24
State
Notes
User
Tags
Created
Runtime
Sweep
BATCH_SIZE
HIDDEN_SIZE
LEARNING_RATE
MODE
NUM_EPOCHS
NUM_MICRO_BATCHES
PIPELINE
SEQ_LEN
SEQ_LENGTH
TENSOR_PARALLEL
TENSOR_PARALLEL_MODE
TENSOR_PARALLEL_SIZE
TENSOR_SHAPE
TOTAL_BATCH_SIZE
VOCAB_SIZE
WARMUP_EPOCHS
WEIGHT_DECAY
backend
clip_grad_norm
colossal_config_file
config
data_dir
fp16.mode
from_torch
gpt2_8B
gpt2_small
gpt2_xl
gradient_accumulation
loss.type
loss_fn.type
model.checkpoint
model.dtype
model.fuse_scale_mask_softmax
model.max_position_embeddings
model.type
model.vocab_size
optimizer.lr
optimizer.type
optimizer.weight_decay
parallel.pipeline
parallel.tensor.mode
parallel.tensor.size
zero.model_config.reuse_fp16_shard
zero.model_config.shard_strategy
Failed
kastan
1d 57m 45s
-
2
-
-
-
60
-
-
1024
-
-
-
-
-
-
-
-
-
nccl
-
gpt2_configs/gpt2_zero3.py
gpt2_configs/gpt2_zero3.py
/u/kastan/colossal/data/train_data_FINAL.json
-
true
-
titans.model.gpt.gpt.gpt2_small
-
-
-
-
true
-
-
-
titans.model.gpt.gpt.gpt2_small
-
0.00015
colossalai.nn.optimizer.hybrid_adam.HybridAdam
0.01
-
-
-
true
["<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x1457cac434c0>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x1462da3f3310>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x148dc484e190>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x14c5a94c72b0>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x14dc247654c0>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x150e7506b340>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x152b4cc525e0>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x1531bb88c430>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x1537ebac04f0>","<colossalai.zero.shard_utils.tensor_shard_strategy.TensorShardStrategy object at 0x1549b257e5e0>"]
1-1
of 1