Skip to main content

throughput/tflops (23/08/22 09:36:43)

Created on August 22|Last edited on August 22

MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 0, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 0, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_2L, machine: login39.chn, world_size: 1, env.SP_TYPE: -, micro_batch_size: 1, seq_length: 2048, env.GAS: -, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 16, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, global_batch_size: 8, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 8, zero_stage: 3, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: falseMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 2, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: falseMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: trueMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 3, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: trueMODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, env.SP_TYPE: ds, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, global_batch_size: 1, zero_stage: 3, env.MPSIZE: 1, env.PPSIZE: 1, env.SPSIZE: 8, use_flash_attn: falseMODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 192000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 64, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 288000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 64, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 200000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 288000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: trueMODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, env.SP_TYPE: megatron, micro_batch_size: 1, seq_length: 300000, env.GAS: 1, global_batch_size: 1, zero_stage: 1, env.MPSIZE: 32, env.PPSIZE: 1, env.SPSIZE: 1, use_flash_attn: true0102030405060708090100110
Run set
133