Saforem2's workspace
Runs
1,333
Name
6 visualized
MODEL_SIZE: GPT1T_2L
MODEL_SIZE: GPT1T_2L
1
2
MODEL_SIZE: GPT1T_4L
MODEL_SIZE: GPT1T_4L
1
1
MODEL_SIZE: GPT1T_16L
MODEL_SIZE: GPT1T_16L
1
1
MODEL_SIZE: GPT1T_32L
MODEL_SIZE: GPT1T_32L
1
1
MODEL_SIZE: GPT1T_64L
MODEL_SIZE: GPT1T_64L
1
1
1-5
of 5Throughput/SamplesPerSec (large graph)
5
throughput
11
system/gpu.0.memoryAllocated, system/gpu.1.memoryAllocated, system/gpu.2.memoryAllocated, system/gpu.3.memoryAllocated
system/gpu.0.memoryAllocated, system/gpu.1.memoryAllocated, system/gpu.2.memoryAllocated, system/gpu.3.memoryAllocated
MODEL_SIZE: GPT1T_2L, env.MACHINE: Perlmutter, world_size: 8, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, seq_length: 2048, use_flash_attn: false, env.GAS: 8 GPU 0 Memory Allocated (%)
MODEL_SIZE: GPT1T_2L, env.MACHINE: Perlmutter, world_size: 8, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, seq_length: 2048, use_flash_attn: false, env.GAS: 8 GPU 1 Memory Allocated (%)
MODEL_SIZE: GPT1T_2L, env.MACHINE: Perlmutter, world_size: 8, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, seq_length: 2048, use_flash_attn: false, env.GAS: 8 GPU 2 Memory Allocated (%)
MODEL_SIZE: GPT1T_2L, env.MACHINE: Perlmutter, world_size: 8, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 8, global_batch_size: 16, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 1, seq_length: 2048, use_flash_attn: false, env.GAS: 8 GPU 3 Memory Allocated (%)
MODEL_SIZE: GPT1T_4L, env.MACHINE: Perlmutter, world_size: 16, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 16, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 2, seq_length: 2048, use_flash_attn: false, env.GAS: 16 GPU 0 Memory Allocated (%)
MODEL_SIZE: GPT1T_4L, env.MACHINE: Perlmutter, world_size: 16, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 16, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 2, seq_length: 2048, use_flash_attn: false, env.GAS: 16 GPU 1 Memory Allocated (%)
MODEL_SIZE: GPT1T_4L, env.MACHINE: Perlmutter, world_size: 16, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 16, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 2, seq_length: 2048, use_flash_attn: false, env.GAS: 16 GPU 2 Memory Allocated (%)
MODEL_SIZE: GPT1T_4L, env.MACHINE: Perlmutter, world_size: 16, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 16, global_batch_size: 32, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 2, seq_length: 2048, use_flash_attn: false, env.GAS: 16 GPU 3 Memory Allocated (%)
MODEL_SIZE: GPT1T_16L, env.MACHINE: Perlmutter, world_size: 64, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 64, global_batch_size: 128, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 8, seq_length: 2048, use_flash_attn: false, env.GAS: 64 GPU 0 Memory Allocated (%)
MODEL_SIZE: GPT1T_16L, env.MACHINE: Perlmutter, world_size: 64, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 64, global_batch_size: 128, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 8, seq_length: 2048, use_flash_attn: false, env.GAS: 64 GPU 1 Memory Allocated (%)
MODEL_SIZE: GPT1T_16L, env.MACHINE: Perlmutter, world_size: 64, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 64, global_batch_size: 128, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 8, seq_length: 2048, use_flash_attn: false, env.GAS: 64 GPU 2 Memory Allocated (%)
MODEL_SIZE: GPT1T_16L, env.MACHINE: Perlmutter, world_size: 64, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 64, global_batch_size: 128, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 8, seq_length: 2048, use_flash_attn: false, env.GAS: 64 GPU 3 Memory Allocated (%)
MODEL_SIZE: GPT1T_32L, env.MACHINE: Perlmutter, world_size: 128, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 128, global_batch_size: 256, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 16, seq_length: 2048, use_flash_attn: false, env.GAS: 128 GPU 0 Memory Allocated (%)
MODEL_SIZE: GPT1T_32L, env.MACHINE: Perlmutter, world_size: 128, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 128, global_batch_size: 256, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 16, seq_length: 2048, use_flash_attn: false, env.GAS: 128 GPU 1 Memory Allocated (%)
MODEL_SIZE: GPT1T_32L, env.MACHINE: Perlmutter, world_size: 128, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 128, global_batch_size: 256, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 16, seq_length: 2048, use_flash_attn: false, env.GAS: 128 GPU 2 Memory Allocated (%)
MODEL_SIZE: GPT1T_32L, env.MACHINE: Perlmutter, world_size: 128, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 128, global_batch_size: 256, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 16, seq_length: 2048, use_flash_attn: false, env.GAS: 128 GPU 3 Memory Allocated (%)
MODEL_SIZE: GPT1T_64L, env.MACHINE: Perlmutter, world_size: 256, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 256, global_batch_size: 512, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 32, seq_length: 2048, use_flash_attn: false, env.GAS: 256 GPU 0 Memory Allocated (%)
MODEL_SIZE: GPT1T_64L, env.MACHINE: Perlmutter, world_size: 256, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 256, global_batch_size: 512, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 32, seq_length: 2048, use_flash_attn: false, env.GAS: 256 GPU 1 Memory Allocated (%)
MODEL_SIZE: GPT1T_64L, env.MACHINE: Perlmutter, world_size: 256, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 256, global_batch_size: 512, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 32, seq_length: 2048, use_flash_attn: false, env.GAS: 256 GPU 2 Memory Allocated (%)
MODEL_SIZE: GPT1T_64L, env.MACHINE: Perlmutter, world_size: 256, micro_batch_size: 2, deepspeed_configuration.gradient_accumulation_steps: 256, global_batch_size: 512, zero_stage: 1, env.MPSIZE: 8, env.PPSIZE: 32, seq_length: 2048, use_flash_attn: false, env.GAS: 256 GPU 3 Memory Allocated (%)
To pick up a draggable item, press the space bar.
While dragging, use the arrow keys to move the item.
Press space again to drop the item in its new position, or press escape to cancel.