Ctl's workspace
Runs
134
Name
4 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
add_nsp
async_worker
attention_dropout_checkpoint
current_run_id
data_loader_type
dataset_path
deepspeed
deepspeed_config.fp16.enabled
deepspeed_config.fp16.hysteresis
deepspeed_config.fp16.loss_scale
deepspeed_config.fp16.loss_scale_window
deepspeed_config.fp16.min_loss_scale
deepspeed_config.gradient_clipping
deepspeed_config.steps_per_print
deepspeed_config.train_batch_size
deepspeed_config.train_micro_batch_size_per_gpu
deepspeed_config.wall_clock_breakdown
deepspeed_transformer_kernel
device
do_validation
ds_config.fp16.enabled
ds_config.fp16.hysteresis
ds_config.fp16.loss_scale
ds_config.fp16.loss_scale_window
ds_config.fp16.min_loss_scale
ds_config.gradient_clipping
ds_config.steps_per_print
ds_config.train_batch_size
ds_config.train_micro_batch_size_per_gpu
ds_config.wall_clock_breakdown
early_exit_time_marker
early_stop_eval_loss
early_stop_time
exp_start_marker
finetune_checkpoint_at_end
fp16
fp16_backend
fp16_opt
gelu_checkpoint
gradient_accumulation_steps
gradient_clipping
job_name
learning_rate
local_rank
Finished
-
ctl
3d 12h 17m 58s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
32
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
32
false
9999999999
2.1
720
761847.42754
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Finished
-
ctl
24m 39s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4056
39
false
false
cuda:0
true
true
2
0
1000
1
0
100
4056
39
false
9999999999
2.1
720
728514.71905
true
true
ds
O2
false
13
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1h 57m 56s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
721317.91667
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 23h 50m 9s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
549059.15356
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2d 1h 23m 37s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
371195.32388
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
10h 27m 58s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
333478.38449
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
14h 39m 6s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
280694.62172
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 10h 25m 53s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
156695.65513
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 18h 56m 4s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
2076.64659
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2d 17h 28m
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
16
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
16
false
9999999999
2.1
720
27682982.63414
true
true
ds
O2
false
32
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2d 21h 58m 28s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
28
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
28
false
9999999999
2.1
720
27415722.09761
true
true
ds
O2
false
18
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1h 45m 48s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
32
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
32
false
9999999999
2.1
720
27408859.56309
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3d 3h 14m 46s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
27062395.74252
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3h 21m 54s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
27050178.92872
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
9h 30m 11s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
27015886.75362
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2h 18m 31s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
27007546.06946
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
11h 23m 46s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
26966409.52443
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1mo 3d 8h 26m 7s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
19291918.75482
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3d 4h 8m 23s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
19017766.33163
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
11d 3h 25m 44s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
17841495.93932
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
10d 15h 12m 10s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
16769210.80041
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3d 8h 37m 52s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
15623409.36317
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
22h 30s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4096
256
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
256
false
9999999999
2.1
720
533033.54643
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Crashed
-
ctl
21h 30m 10s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
16
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
16
false
9999999999
2.1
720
15545897.65583
true
true
ds
O2
false
32
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 14h 55m 32s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4096
256
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
256
false
9999999999
2.1
720
391932.04698
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Crashed
-
ctl
1d 22h 38m 47s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
32
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
32
false
9999999999
2.1
720
15375314.56989
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Failed
-
ctl
4m 25s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
32
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
32
false
9999999999
2.1
720
15373368.27116
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Crashed
-
ctl
41m 50s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
32
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
32
false
9999999999
2.1
720
15370726.24935
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1h 58m 20s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
15328615.92298
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 8h 18m 24s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4000
160
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
160
false
9999999999
2.1
720
263576.74819
true
true
ds
O2
false
25
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 13h 42m 52s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4032
6
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
6
false
9999999999
2.1
720
15161116.38714
true
true
ds
O2
false
84
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1m 32s
-
false
true
false
dist
word_bin_data
true
true
2
0
1000
1
0
100
4096
8
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
8
false
9999999999
2.1
720
15160928.44416
true
true
ds
O2
false
64
0
pretraining_experiment-
0.001
0
Failed
-
ctl
2d 9h 30m 34s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4096
128
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
128
false
9999999999
2.1
720
28445.39534
true
true
ds
O2
false
32
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 1h 4m 20s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
20
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
20
false
9999999999
2.1
720
6819292.95654
true
true
ds
O2
false
25
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2d 12h 18m 41s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
50
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
50
false
9999999999
2.1
720
14896150.02467
true
true
ds
O2
false
10
0
pretraining_experiment-
0.001
0
Finished
-
ctl
22h 15m 37s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
128
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
128
false
9999999999
2.1
720
14783703.55602
true
true
ds
O2
false
4
0
pretraining_experiment-
0.001
0
Finished
-
ctl
17m 56s
-
false
true
false
per_device
dataset/bin_data/
true
true
2
0
1000
1
0
100
4096
64
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
64
false
9999999999
2.1
720
153.42785
true
true
ds
O2
false
64
0
pretraining_experiment-
0.001
0
Crashed
-
ctl
18m 38s
-
false
true
false
per_device
dataset/bin_data/
true
true
2
0
1000
1
0
100
4096
64
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
64
false
9999999999
2.1
720
164334.36661
true
true
ds
O2
false
64
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 2h 49m 47s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
128
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
128
false
9999999999
2.1
720
14259027.53935
true
true
ds
O2
false
4
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 2h 43m 30s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
84
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
84
false
9999999999
2.1
720
14120587.6938
true
true
ds
O2
false
6
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 2h 47m 35s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
84
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
84
false
9999999999
2.1
720
14013395.71477
true
true
ds
O2
false
6
0
pretraining_experiment-
0.001
0
Crashed
-
ctl
32s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
84
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
84
false
9999999999
2.1
720
14013282.06828
true
true
ds
O2
false
6
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 40s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
84
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
84
false
9999999999
2.1
720
13918998.5612
true
true
ds
O2
false
6
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 2h 47m 46s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
84
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
84
false
9999999999
2.1
720
13785520.82515
true
true
ds
O2
false
6
0
pretraining_experiment-
0.001
0
Finished
-
ctl
6h 13m 10s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
84
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
84
false
9999999999
2.1
720
13763074.79151
true
true
ds
O2
false
6
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3d 2h 49m 37s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
32
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
32
false
9999999999
2.1
720
13429390.29682
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 7h 59m 5s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.3
100
4032
84
false
false
cuda:0
true
true
2
0
1000
1
0.3
100
4032
84
false
9999999999
2.1
720
13292534.53859
true
true
ds
O2
false
6
0.3
pretraining_experiment-
0.001
0
Finished
-
ctl
20h 19m 34s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.3
100
4032
72
false
false
cuda:0
true
true
2
0
1000
1
0.3
100
4032
72
false
9999999999
2.1
720
13219100.4654
true
true
ds
O2
false
7
0.3
pretraining_experiment-
0.001
0
Finished
-
ctl
7d 3h 53m 36s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4048
92
false
false
cuda:0
true
true
2
0
1000
1
0
100
4048
92
false
9999999999
2.1
720
3279279.77963
true
true
ds
O2
false
11
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2d 20h 26m 48s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
12947514.69377
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2d 22h 16s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4096
256
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
256
false
9999999999
2.1
720
1406051.14155
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2d 17h 15m 1s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.3
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0.3
100
4032
42
false
9999999999
2.1
720
12697345.55686
true
true
ds
O2
false
12
0.3
pretraining_experiment-
0.001
0
Finished
-
ctl
15h 48m 26s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4000
80
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
80
false
9999999999
2.1
720
1151899.89098
true
true
ds
O2
false
50
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 15m 28s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.3
100
4008
167
false
false
cuda:0
true
true
2
0
1000
1
0.3
100
4008
167
false
9999999999
2.1
720
12603111.63302
true
true
ds
O2
false
3
0.3
pretraining_experiment-
0.0005
0
Failed
-
ctl
11h 24m 41s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.3
100
4008
167
false
false
cuda:0
true
true
2
0
1000
1
0.3
100
4008
167
false
9999999999
2.1
720
12472220.2607
true
true
ds
O2
false
3
0.3
pretraining_experiment-
0.0008
0
Finished
-
ctl
1d 18h 36m 21s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4050
450
false
false
cuda:0
true
true
2
0
1000
1
0
100
4050
450
false
9999999999
2.1
720
968278.11768
true
true
ds
O2
false
9
0
pretraining_experiment-
0.001
0
Failed
-
ctl
10h 58m 25s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.3
100
4008
167
false
false
cuda:0
true
true
2
0
1000
1
0.3
100
4008
167
false
9999999999
2.1
720
12430986.72127
true
true
ds
O2
false
3
0.3
pretraining_experiment-
0.001
0
Failed
-
ctl
19h 22m 10s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.5
100
4096
128
false
false
cuda:0
true
true
2
0
1000
1
0.5
100
4096
128
false
9999999999
2.1
720
12345881.26204
true
true
ds
O2
false
4
0.5
pretraining_experiment-
0.0005
0
Finished
-
ctl
1d 19h 31m 15s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0
100
4056
312
false
false
cuda:0
true
true
2
0
1000
1
0
100
4056
312
false
9999999999
2.1
720
805123.79397
true
true
ds
O2
false
13
0
pretraining_experiment-
0.001
0
Failed
-
ctl
12h 14m 18s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.5
100
4096
128
false
false
cuda:0
true
true
2
0
1000
1
0.5
100
4096
128
false
9999999999
2.1
720
12282790.64231
true
true
ds
O2
false
4
0.5
pretraining_experiment-
0.0008
0
Finished
-
ctl
10d 10h 3m 37s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
56
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
56
false
9999999999
2.1
720
2377798.0441
true
true
ds
O2
false
18
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 42s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
32
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
32
false
9999999999
2.1
720
12118846.30977
true
true
ds
O2
false
16
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 44s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
12118671.68307
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 42s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
12118481.60912
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3d 9h 34m 56s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.5
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0.5
100
4032
42
false
9999999999
2.1
720
11786356.73445
true
true
ds
O2
false
12
0.5
pretraining_experiment-
0.001
0
Failed
-
ctl
1d 23h 57m 52s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0.8
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0.8
100
4032
42
false
9999999999
2.1
720
11610997.56445
true
true
ds
O2
false
12
0.8
pretraining_experiment-
0.0008
0
Finished
-
ctl
7d 3h 8m 13s
-
false
true
false
dist
./bin_data
true
true
2
0
1000
1
0
100
4032
72
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
72
false
2400
6
1800
1759480.36414
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Finished
-
ctl
16h 40m 49s
-
false
true
false
per_device
bin_data
true
true
2
0
1000
1
0.5
100
4000
80
false
false
cuda:0
true
true
2
0
1000
1
0.5
100
4000
80
false
9999999999
2.1
720
23201.54532
true
true
ds
O2
false
50
0.5
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 3h 32m 28s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
1
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
1
100
4032
42
false
9999999999
2.1
720
11499655.68154
true
true
ds
O2
false
12
1
pretraining_experiment-
0.001
0
Finished
-
ctl
2h 1m 48s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
42
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
42
false
9999999999
2.1
720
11485008.46807
true
true
ds
O2
false
12
0
pretraining_experiment-
0.001
0
Finished
-
ctl
5m 39s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
11484634.88964
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 21s
-
false
true
false
dist
./bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
24
6
180
1665452.23051
true
true
ds
O2
false
10
0
pretraining_experiment-
0.001
0
Finished
-
ctl
9h 19m 56s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
1
100
4000
80
false
false
cuda:0
true
true
2
0
1000
1
1
100
4000
80
false
9999999999
2.1
720
215538.76334
true
true
ds
O2
false
50
1
pretraining_experiment-
0.001
0
Finished
-
ctl
1h 8m 27s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
1
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
1
100
4000
100
false
9999999999
2.1
720
178188.88962
true
true
ds
O2
false
40
1
pretraining_experiment-
0.001
0
Finished
-
ctl
4d 3h 4m 1s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
11101313.31206
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Finished
-
ctl
6h 36m 16s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
1
100
4000
50
false
false
cuda:0
true
true
2
0
1000
1
1
100
4000
50
false
9999999999
2.1
720
16811.28532
true
true
ds
O2
false
80
1
pretraining_experiment-
0.001
0
Finished
-
ctl
6m 29s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
11063217.82901
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Finished
class CharAwareEmbedding(nn.Module):
def __init__(self, config):
super(CharAwareEmbedding, self).__init__()
self.config = config
self.char_embed = nn.Embedding(256, 32, padding_idx=0)
conv_out_channels = [32, 64, 96, 128, 256, 512]
conv_fh = [1, 1, 1, 1, 1, 1]
conv_fw = [1, 3, 6, 9, 12, 15]
self.char_conv = nn.ModuleList([
nn.Conv2d(in_channels=32, out_channels=out_channels, kernel_size=kernel, stride=1)
for out_channels, kernel in zip(conv_out_channels, zip(conv_fh, conv_fw))
])
self.output = nn.Linear(sum(conv_out_channels), config.hidden_size)
ctl
2d 1h 53m 47s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
1
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
1
100
4000
100
false
9999999999
2.1
720
2992903.8794
true
true
ds
O2
false
40
1
pretraining_experiment-
0.001
0
Finished
-
ctl
11h 23m 21s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
9999999999
2.1
720
2949834.47118
true
true
ds
O2
false
40
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 42s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
9999999999
2.1
720
2949005.19431
true
true
ds
O2
false
40
0
pretraining_experiment-
0.001
0
Finished
-
ctl
4h 49m 4s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
9999999999
2.1
720
2931616.90669
true
true
ds
O2
false
40
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 52s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
9999999999
2.1
720
2917217.50634
true
true
ds
O2
false
40
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 49s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
9999999999
2.1
720
2867245.88765
true
true
ds
O2
false
40
0
pretraining_experiment-
0.001
0
Finished
-
ctl
2m 37s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
9999999999
2.1
720
2867035.2689
true
true
ds
O2
false
40
0
pretraining_experiment-
0.001
0
Finished
-
ctl
43s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4000
100
false
false
cuda:0
true
true
2
0
1000
1
0
100
4000
100
false
9999999999
2.1
720
2866938.31823
true
true
ds
O2
false
40
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3h 28m 20s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
64
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
64
false
9999999999
2.1
720
2833343.70911
true
true
ds
O2
false
64
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3m 25s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
64
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
64
false
9999999999
2.1
720
2833066.4844
true
true
ds
O2
false
64
0
pretraining_experiment-
0.001
0
Finished
-
ctl
5m 33s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
64
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
64
false
9999999999
2.1
720
2832635.74802
true
true
ds
O2
false
64
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1m 6s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4096
64
false
false
cuda:0
true
true
2
0
1000
1
0
100
4096
64
false
9999999999
2.1
720
2832525.48351
true
true
ds
O2
false
64
0
pretraining_experiment-
0.001
0
Finished
-
ctl
4d 2m 36s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
10653332.97948
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Crashed
-
ctl
1d 4h 21m 18s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
10551069.53643
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Finished
-
ctl
15h 2m 22s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4070
110
false
false
cuda:0
true
true
2
0
1000
1
0
100
4070
110
false
9999999999
2.1
720
2552996.26771
true
true
ds
O2
false
37
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 50s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4070
110
false
false
cuda:0
true
true
2
0
1000
1
0
100
4070
110
false
9999999999
2.1
720
2552759.95355
true
true
ds
O2
false
37
0
pretraining_experiment-
0.001
0
Finished
-
ctl
13h 12m 48s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4070
110
false
false
cuda:0
true
true
2
0
1000
1
0
100
4070
110
false
9999999999
2.1
720
2505133.68922
true
true
ds
O2
false
37
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 45s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4070
110
false
false
cuda:0
true
true
2
0
1000
1
0
100
4070
110
false
9999999999
2.1
720
2504552.29117
true
true
ds
O2
false
37
0
pretraining_experiment-
0.001
0
Finished
-
ctl
45m 19s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4070
110
false
false
cuda:0
true
true
2
0
1000
1
0
100
4070
110
false
9999999999
2.1
720
2501790.55727
true
true
ds
O2
false
37
0
pretraining_experiment-
0.001
0
Finished
-
ctl
3d 22h 44m 50s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4032
36
false
false
cuda:0
true
true
2
0
1000
1
0
100
4032
36
false
9999999999
2.1
720
10195978.47699
true
true
ds
O2
false
14
0
pretraining_experiment-
0.001
0
Crashed
-
ctl
2m 40s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4080
34
false
false
cuda:0
true
true
2
0
1000
1
0
100
4080
34
false
9999999999
2.1
720
10195782.08393
true
true
ds
O2
false
15
0
pretraining_experiment-
0.001
0
Failed
-
ctl
1m 42s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4056
39
false
false
cuda:0
true
true
2
0
1000
1
0
100
4056
39
false
9999999999
2.1
720
10195613.4702
true
true
ds
O2
false
13
0
pretraining_experiment-
0.001
0
Finished
-
ctl
1d 2h 10m 2s
-
false
true
false
dist
bin_data
true
true
2
0
1000
1
0
100
4070
110
false
false
cuda:0
true
true
2
0
1000
1
0
100
4070
110
false
9999999999
2.1
720
2210593.18571
true
true
ds
O2
false
37
0
pretraining_experiment-
0.001
0
1-100
of 134