Skip to main content
l2hmc-qcd
Projects
Megatron-DS-Benchmarking
Reports
throughput/tflops (23/08/22 09:36:43)
Log in
Sign up
Share
Comment
Star
Share
Comment
Star
throughput/tflops (23/08/22 09:36:43)
Sam Foreman
Created on August 22
|
Last edited on August 22
Comment
throughput/tflops
throughput/tflops
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 0, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 0, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_2L, machine: login39.chn, world_size: 1, env.SP_TYPE: -, micro_batch_size: 1, seq_length: 2048, env.GAS: -, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 8, zero_stage: 3, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: false
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 2, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: false
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: true
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 3, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: true
MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 3, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: false
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 192000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 64, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 288000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 64, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 200000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 288000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 300000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true
0
10
20
30
40
50
60
70
80
90
100
110
Run set
133
Add a comment
110