Skip to main content

throughput/samples_per_sec (23/08/14 08:28:33)

Created on August 14|Last edited on August 14

MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 4, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT6_7B, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT6_7B, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT145B_6L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 2, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B-4layers, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B8L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 2, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_2L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16MODEL_SIZE: GPT145B_6L, machine: Polaris, world_size: 32, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_6L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16MODEL_SIZE: GPT145B_4L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT145B_6L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT145B4L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT6_7B, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT145B_2L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16MODEL_SIZE: GPT125M, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT2_7B, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_8L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 0, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, micro_batch_size: 4, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 32, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_1L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, micro_batch_size: 2, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 16, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_2L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_2L, machine: login39.chn, world_size: 1, micro_batch_size: 1, seq_length: 2048, env.GAS: -, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 1, env.SP_TYPE: -, env.SPSIZE: 1MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 16, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 16, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: Polaris, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT1T_4L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 8, env.GLOBAL_BATCH: 8, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 8, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 8, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 16, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 2, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 16MODEL_SIZE: GPT145B_5L, machine: ThetaGPU, world_size: 8, micro_batch_size: 1, seq_length: 2048, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 3, env.MPSIZE: 1, env.SP_TYPE: ds, env.SPSIZE: 8MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 192000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 200000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 288000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 1MODEL_SIZE: GPT33B, machine: ThetaGPU, world_size: 32, micro_batch_size: 1, seq_length: 300000, env.GAS: 1, env.GLOBAL_BATCH: 1, zero_stage: 1, env.MPSIZE: 32, env.SP_TYPE: megatron, env.SPSIZE: 101002003004005006007008009001,0001,1001,2001,300
Run set
135