kastan

Kastan's group workspace

Group: Aug-05__11:13

Failed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

32s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

33s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

33s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

36s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

32s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

33s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=8

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

36s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

33s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

34s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

34s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Failed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

34s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=8

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

35s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

37s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

33s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

37s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

34s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=8

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

34s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

33s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

34s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

Crashed

kastan

Aug-05__11:13

BATCH_SIZE32

NUM_EPOCHS=3

NUM_MICRO_BATCHES=4

SLURM=513418

TP=4

WORLD_SIZE=32

3y ago

33s

0.00015

./quant_gpt2_2.5d_tp4_bs32_lr0.00015/

1024

128

50304

0.01

col_ai_quant

/u/kastanday/LLM-Distributed-Quantization/datasets/small-gpt-dataset.json

AMP_TYPE.NAIVE

false

torch.float32

torch.bfloat16

torch.float32

1024

50304

torch.float32

torch.bfloat16

torch.float32

0.00015

0.01

titans.model.quant_gpt.quant_gpt.quant_gpt2_8B

titans.model.quant_gpt.quant_gpt.quant_gpt2_xl

2.5d

titans.loss.lm_loss.gpt_lmloss.GPTLMLoss

1-20

of 32