Borisd13's workspace
Runs
3,071
Name
3 visualized
Notes
per_device_train_batch_size
learning_rate
train/step
State
34
0.0001
107300
Crashed
34
0.0001
107000
Crashed
34
0.0001
0
Finished
34
0.0001
137300
Killed
34
0.0001
142000
Killed
34
0.0001
308400
Killed
34
0.0001
333600
Killed
34
0.0001
25500
Killed
34
0.0001
0
Failed
34
0.0001
72600
Killed
34
0.0001
106900
Killed
34
0.0001
1400
Killed
34
0.0001
0
Failed
34
0.0001
0
Failed
34
0.0001
-
Failed
34
0.0001
337600
Crashed
40
0.0001
64100
Crashed
40
0.0001
471
Killed
60
0.0001
0
Failed
60
0.0001
0
Failed
60
0.0001
0
Crashed
60
0.0001
0
Failed
40
0.0001
89
Killed
40
0.0001
266
Killed
40
0.0001
653
Killed
40
0.0001
43
Killed
40
0.0001
153
Killed
40
0.0001
111
Killed
40
0.0001
-
Failed
40
0.0001
112
Killed
40
0.0001
-
Finished
40
0.0001
400
Killed
40
0.0001
0
Failed
44
0.0001
0
Failed
48
0.0001
0
Failed
50
0.0001
0
Failed
60
0.0001
0
Failed
50
0.0001
0
Failed
60
0.0001
0
Failed
50
0.0001
0
Failed
60
0.0001
0
Failed
60
0.0001
0
Failed
60
0.0001
-
Failed
60
0.0001
-
Failed
60
0.0001
-
Failed
-
-
-
Finished
-
-
-
Finished
60
0.0001
-
Failed
60
0.0001
291
Killed
60
0.0001
19
Killed
batch_size_per_step
embeddings_only
eval_steps
gradient_accumulation_steps
log_model
logging_steps
lr_offset
lr_resume_offset
model_config._name_or_path
model_config.activation_dropout
model_config.activation_function
model_config.add_cross_attention
model_config.attention_dropout
model_config.bos_token_id
model_config.chunk_size_feed_forward
model_config.d_model
model_config.decoder_attention_heads
model_config.decoder_ffn_dim
model_config.decoder_layers
model_config.decoder_start_token_id
model_config.diversity_penalty
model_config.do_sample
model_config.dropout
model_config.early_stopping
model_config.encoder_attention_heads
model_config.encoder_ffn_dim
model_config.encoder_layers
model_config.encoder_no_repeat_ngram_size
model_config.encoder_vocab_size
model_config.eos_token_id
model_config.force_ln_scale
model_config.gradient_checkpointing
model_config.id2label.0
model_config.id2label.1
model_config.image_length
model_config.image_vocab_size
model_config.init_std
model_config.is_decoder
model_config.is_encoder_decoder
model_config.label2id.LABEL_0
model_config.label2id.LABEL_1
model_config.length_penalty
model_config.ln_positions
model_config.ln_type
model_config.max_length
model_config.max_text_length
model_config.min_length
model_config.model_type
model_config.no_repeat_ngram_size
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
subln
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
true
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
subln
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
subln
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
true
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
subln
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50300
16385
false
false
LABEL_0
LABEL_1
256
16400
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
normformer
layernorm
257
64
257
dallebart
0
-
false
400
3
false
100
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
816
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
3
false
100
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
3
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2560
12
16384
0
true
0
false
16
2560
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
2
16384
0
true
0
false
16
2730
2
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
4
16384
0
true
0
false
16
2730
4
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
-
false
400
2
false
1
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2560
12
16384
0
true
0
false
16
2560
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
false
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
640
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
704
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
768
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
800
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
800
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
4096
12
16384
0
true
0
false
16
4096
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16391
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
4096
12
16384
0
true
0
false
16
4096
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16391
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
800
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16385
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16385
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
-
false
400
2
false
1
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
400
2
false
1
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
400
2
false
1
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
400
2
false
1
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
960
false
400
2
false
1
0
-
0
gelu
false
0
16384
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16384
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
960
false
400
2
false
1
0
-
0
gelu
false
0
16384
0
1024
16
2730
12
16384
0
true
0
false
16
2730
12
0
50264
16384
false
true
LABEL_0
LABEL_1
256
16384
0.02
false
true
0
1
1
swinv2
layernorm
257
64
257
dallebart
0
1-50
of 3,071