Skip to main content
l2hmc-qcd
Projects
Megatron-DS-Benchmarking
Reports
throughput/samples_per_sec (23/08/14 08:32:05)
Log in
Sign up
Share
Comment
Star
Share
Comment
Star
throughput/samples_per_sec (23/08/14 08:32:05)
Sam Foreman
Created on August 14
|
Last edited on August 14
Comment
throughput/samples_per_sec
throughput/samples_per_sec
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 4, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT6_7B, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT6_7B, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT145B_6L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 2, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B-4layers, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B8L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 2, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_2L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16
MODEL_SIZE: GPT145B_6L, machine: Polaris, world_size: 32, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16
MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_6L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16
MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT145B_6L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT145B4L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT145B_2L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16
MODEL_SIZE: GPT125M, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT2_7B, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_8L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_2L, machine: login39.chn, world_size: 1, micro_batch_size: 1, seq_length: 2048, env.GAS: -, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 1, env.SP_TYPE: -, env.SPSIZE: 1
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 16, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 8, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 192000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 200000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 288000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 300000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1
0
100
200
300
400
500
600
700
800
900
1,000
1,100
1,200
1,300
Run set
135
Add a comment
1,300