Skip to main content

Snapshot Apr 4 2021, 10:3pm

Created on April 5|Last edited on April 5

Section 1


meta
49m 58s
53m 22s
19h 22m 23s
config
0.95
0.95
0.95
1.0000e-8
1.0000e-8
1.0000e-8
false
false
true
-
-
local
{"train_batch_size":32,"train_micro_batch_size_per_gpu":4,"gradient_accumulation_steps":1,"optimizer":{"type":"Adam","params":{"lr":0.0003,"max_grad_norm":1,"betas":[0.9,0.95]}},"fp16":{"fp16":true,"enabled":true,"loss_scale":0,"loss_scale_window":1000,"hysteresis":2,"min_loss_scale":1},"gradient_clipping":1,"zero_optimization":{"stage":0,"allgather_partitions":true,"allgather_bucket_size":500000000,"overlap_comm":true,"reduce_scatter":true,"reduce_bucket_size":500000000,"contiguous_gradients":true,"cpu_offload":false},"steps_per_print":10,"wall_clock_breakdown":true,"deepspeed":true}
{"train_batch_size":32,"train_micro_batch_size_per_gpu":4,"gradient_accumulation_steps":1,"optimizer":{"type":"Adam","params":{"lr":0.0003,"max_grad_norm":1,"betas":[0.9,0.95]}},"fp16":{"fp16":true,"enabled":true,"loss_scale":0,"loss_scale_window":1000,"hysteresis":2,"min_loss_scale":1},"gradient_clipping":1,"zero_optimization":{"stage":1,"allgather_partitions":true,"allgather_bucket_size":500000000,"overlap_comm":true,"reduce_scatter":true,"reduce_bucket_size":500000000,"contiguous_gradients":true,"cpu_offload":false},"steps_per_print":10,"wall_clock_breakdown":true,"deepspeed":true}
{"train_batch_size":24,"train_micro_batch_size_per_gpu":4,"gradient_accumulation_steps":1,"optimizer":{"type":"cpu_adam","params":{"lr":0.0003,"max_grad_norm":1,"betas":[0.9,0.95]}},"fp16":{"fp16":true,"enabled":true,"loss_scale":0,"loss_scale_window":1000,"hysteresis":2,"min_loss_scale":1},"gradient_clipping":1,"zero_optimization":{"stage":3,"cpu_offload":true,"cpu_offload_params":true,"overlap_comm":true,"contiguous_gradients":true,"stage3_max_live_parameters":6000000,"stage3_max_reuse_distance":100000000,"stage3_prefetch_bucket_size":200000,"stage3_param_persitance_threshold":100000,"reduce_bucket_size":3000000,"sub_group_size":1000000},"steps_per_print":10,"wall_clock_breakdown":true,"deepspeed":true}
-
-
false
false
false
true
cd0f0b0
cd0f0b0
a21388d
-
-
128
-
-
1000
-
-
/mnt/ssd-cluster/checkpoints
3.5
3.5
2.5
-
-
0.15
type:transformer
type:transformer
-
-
-
0.1
3.5
3.5
2.5
1.0000e-8
1.0000e-8
1.0000e-8
1.0000e-8
1.0000e-8
1.0000e-8
0.1
0.1
0.1
-
-
false
SWDKQ6x8Rs5E2xzRZ9NxiV
G4NuteJNEsu5fhd2Htcs6a
n6HSC59hVUhE8BUMPsurvw
8
8
6
500000000
500000000
3000000
0
1
3
summary
flops
s
16108818889067.146
16628937012415.434
3043893855463.7813
samples
12.2362
12.63128
2.95296
2996
3201
69734
2k4k6k8kStep4e+126e+128e+121e+131.2e+131.4e+131.6e+131.8e+13
2k4k6k8kStep4681012
Run set
22



Run set
22



Run set
22