Kastan's group workspace
Group: Aug-05__12:37
Name
64 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
BATCH_SIZE
LEARNING_RATE
LOG_PATH
NUM_EPOCHS
SEQ_LENGTH
TOTAL_BATCH_SIZE
VOCAB_SIZE
WARMUP_EPOCHS
WEIGHT_DECAY
clip_grad_norm
conda_env_name
data_dir
fp16.mode
gpt2_8B
gpt2_large
gpt2_medium
gpt2_xl
gradient_accumulation
model.checkpoint
model.decoder_dtype
model.embed_dtype
model.head_dtype
model.layernorm_dtype
model.max_position_embeddings
model.vocab_size
model_dtypes.decoder_dtype
model_dtypes.embed_dtype
model_dtypes.head_dtype
model_dtypes.layernorm_dtype
num_gpus_per_node
optimizer.lr
optimizer.weight_decay
parallel.pipeline
quant_gpt2_8B
quant_gpt2_micro
quant_gpt2_small
quant_gpt2_xl
total_gpus
MICRO_BATCH_SIZE
NUM_MICRO_BATCHES
PIPELINE_SIZE
TENSOR_PARALLEL_MODE
TENSOR_PARALLEL_SIZE
loss.type
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
12s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
3s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
39s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
20s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
3s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
36s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
11s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
13s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
39s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
4s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
3s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
40s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
40s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
39s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
20s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
38s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
11s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
38s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Failed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
12s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
Crashed
-
kastan
Aug-05__12:37
BATCH_SIZE16
MICRO_BATCH_SIZE=4
NUM_EPOCHS=3
NUM_MICRO_BATCHES=16
PP=2
SLURM=513717
TP=8
WORLD_SIZE=64
12s
-
16
0.00015
./quant_gpt2_3d_tp8_bs16_lr0.00015/
3
1024
64
50304
1
0.01
1
col_ai_quant
/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json
AMP_TYPE.NAIVE
-
-
-
-
4
false
torch.float16
torch.float16
torch.float16
torch.float16
1024
50304
torch.float16
torch.float16
torch.float16
torch.float16
4
0.00015
0.01
2
titans.model.quant_gpt.quant_gpt.quant_gpt2_8B
-
-
titans.model.quant_gpt.quant_gpt.quant_gpt2_xl
64
4
16
2
3d
8
titans.loss.lm_loss.gpt_lmloss.GPTLMLoss
1-20
of 64