Pszemraj's workspace
Runs
98
Name
74 visualized
train/epoch
eval/accuracy
train/loss
eval/loss
State
Tags
0.86
0.61042
2.1672
2.13371
Crashed
0.02
-
2.5944
-
Killed
1
0.61588
2.0481
1.98346
Finished
2
0.96965
0.092
0.08145
Finished
0.27
0.60461
2.1083
2.06976
Killed
2
-
1.3521
-
Finished
1.99
0.76692
0.8665
0.78507
Finished
1.15
-
1.0148
-
Killed
0.95
-
1.0374
-
Killed
2
0.49125
1.138
1.15812
Finished
2
0.50125
1.1123
1.15187
Failed
1
-
2.8129
2.92257
Finished
2
0.5488
3.1206
2.79839
Finished
2
0.54682
3.147
2.82977
Finished
0.21
0.15195
6.495
6.12077
Killed
-
-
-
-
Failed
-
-
-
-
Failed
2
0.55901
3.0212
2.70737
Finished
0.64
0.49002
3.6602
3.3604
Killed
0.51
0.47935
3.7206
3.44531
Killed
Notes
Runtime
_name_or_path
adam_beta2
adam_epsilon
attention_dropout
bf16
dropout
eval_steps
fp16
gradient_accumulation_steps
hub_model_id
learning_rate
logging_dir
lr_scheduler_type
output_dir
per_device_eval_batch_size
per_device_train_batch_size
qa_dropout
run_name
save_steps
seq_classif_dropout
sinusoidal_pos_embds
torch_compile
torch_compile_backend
torch_compile_mode
cls_token_id
embedding_size
finetuning_task
id2label.2
id2label.3
id2label.4
id2label.5
id2label.6
id2label.7
id2label.8
label2id.B-LOC
label2id.B-MISC
label2id.B-ORG
label2id.B-PER
label2id.I-LOC
label2id.I-MISC
label2id.I-ORG
label2id.I-PER
label2id.O
rotary_value
add_lm_hidden_dense_layer
add_token_type_embeddings
attention_activation
bidirectional
chunk_size
-
1d 3h 48m 32s
./roformerMLM_post_multitask
0.98
1.0000e-7
-
true
-
150
false
16
roformerMLM_post_multitask-minipile_4096-vN
0.00015
./runtime/masked/outputs-roformerMLM_post_multitask-minipile_4096-vN
polynomial
./runtime/masked/outputs-roformerMLM_post_multitask-minipile_4096-vN
4
4
-
roformerMLM_post_multitask-minipile_4096-vN
200
-
-
true
inductor
-
101
768
ner
I-PER
B-ORG
I-ORG
B-LOC
I-LOC
B-MISC
I-MISC
5
7
3
1
6
8
4
2
0
false
-
-
-
-
-
-
38m 7s
./roformerMLM_post_multitask
0.98
1.0000e-7
-
true
-
150
false
16
roformerMLM_post_multitask-minipile_4096-vN
0.00021
./runtime/masked/outputs-roformerMLM_post_multitask-minipile_4096-vN
polynomial
./runtime/masked/outputs-roformerMLM_post_multitask-minipile_4096-vN
4
4
-
roformerMLM_post_multitask-minipile_4096-vN
200
-
-
true
inductor
-
101
768
ner
I-PER
B-ORG
I-ORG
B-LOC
I-LOC
B-MISC
I-MISC
5
7
3
1
6
8
4
2
0
false
-
-
-
-
-
-
4h 11m 20s
./bert-plus-embedderForMLM
0.98
1.0000e-8
-
true
-
150
false
16
bert-plus-embedderForMLM-goodwiki-deduped-split_4096-usecache
0.0001
./runtime/masked/outputs-bert-plus-embedderForMLM-goodwiki-deduped-split_4096-usecache
linear
./runtime/masked/outputs-bert-plus-embedderForMLM-goodwiki-deduped-split_4096-usecache
4
4
-
bert-plus-embedderForMLM-goodwiki-deduped-split_4096-usecache
100
-
-
true
inductor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
5m 6s
pszemraj/roformer-L8-v0.23-multitask_squadv2
0.995
1.0000e-8
-
false
-
100
false
16
roformer-L8-v0.23-multitask_squadv2-conll2003
0.00003
./conll2003_conll2003/runs/Feb07_02-00-11_c13a20ce-9725-401f-9ab6-9008f8465024
linear
./conll2003_conll2003/
8
4
-
roformer-L8-v0.23-multitask_squadv2-conll2003
200
-
-
true
inductor
-
101
768
ner
I-PER
B-ORG
I-ORG
B-LOC
I-LOC
B-MISC
I-MISC
5
7
3
1
6
8
4
2
0
false
-
-
-
-
-
-
1h 8m 52s
./bert-plus-embedderForMLM
0.98
1.0000e-8
-
true
-
150
false
16
bert-plus-embedderForMLM-goodwiki-deduped-split_4096-vN
0.0001
./runtime/masked/outputs-bert-plus-embedderForMLM-goodwiki-deduped-split_4096-vN
linear
./runtime/masked/outputs-bert-plus-embedderForMLM-goodwiki-deduped-split_4096-vN
4
4
-
bert-plus-embedderForMLM-goodwiki-deduped-split_4096-vN
100
-
-
true
inductor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
2h 40m 16s
pszemraj/roformer-L8-v0.22-multitask_yahoo
0.995
1.0000e-8
-
true
-
150
false
16
roformer-L8-v0.22-multitask_yahoo-squad_v2
0.00003
./squad_v2_squad_v2/runs/Feb06_23-14-50_c13a20ce-9725-401f-9ab6-9008f8465024
linear
./squad_v2_squad_v2/
8
4
-
roformer-L8-v0.22-multitask_yahoo-squad_v2
200
-
-
true
inductor
-
101
768
text-classification
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
5m 14s
pszemraj/roformer-L8-v0.2-KI_2048-swag_mc
0.99
1.0000e-8
-
true
-
50
false
8
roformer-L8-v0.2-KI_2048-swag_mc-yahoo_answers_topics-long-text-cls
0.00003
./output/BEE-spoke-data/yahoo_answers_topics-long-text_roformer-L8-v0.2-KI_2048-swag_mc/runs/Feb06_22-56-59_c13a20ce-9725-401f-9ab6-9008f8465024
linear
./output/BEE-spoke-data/yahoo_answers_topics-long-text_roformer-L8-v0.2-KI_2048-swag_mc/
8
4
-
roformer-L8-v0.2-KI_2048-swag_mc-yahoo_answers_topics-long-text-cls
100
-
-
true
inductor
-
101
768
text-classification
LABEL_2
LABEL_3
LABEL_4
LABEL_5
LABEL_6
LABEL_7
LABEL_8
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
1m 8s
pszemraj/roformer-L8-v0.2-KI_2048-swag_mc
0.99
1.0000e-8
-
true
-
50
false
8
roformer-L8-v0.2-KI_2048-swag_mc-yahoo_answers_topics-long-text-cls
0.00003
./output/BEE-spoke-data/yahoo_answers_topics-long-text_roformer-L8-v0.2-KI_2048-swag_mc/runs/Feb06_22-55-26_c13a20ce-9725-401f-9ab6-9008f8465024
linear
./output/BEE-spoke-data/yahoo_answers_topics-long-text_roformer-L8-v0.2-KI_2048-swag_mc/
8
4
-
roformer-L8-v0.2-KI_2048-swag_mc-yahoo_answers_topics-long-text-cls
100
-
-
true
inductor
-
101
768
text-classification
LABEL_2
LABEL_3
LABEL_4
LABEL_5
LABEL_6
LABEL_7
LABEL_8
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
3m 4s
pszemraj/roformer-L8-v0.2-KI_2048-swag_mc
0.99
1.0000e-8
-
true
-
150
false
8
roformer-L8-v0.2-KI_2048-swag_mc-yahoo_answers_topics-long-text-cls
0.00003
./output/BEE-spoke-data/yahoo_answers_topics-long-text_roformer-L8-v0.2-KI_2048-swag_mc/runs/Feb06_22-51-22_c13a20ce-9725-401f-9ab6-9008f8465024
linear
./output/BEE-spoke-data/yahoo_answers_topics-long-text_roformer-L8-v0.2-KI_2048-swag_mc/
8
4
-
roformer-L8-v0.2-KI_2048-swag_mc-yahoo_answers_topics-long-text-cls
100
-
-
true
inductor
-
101
768
text-classification
LABEL_2
LABEL_3
LABEL_4
LABEL_5
LABEL_6
LABEL_7
LABEL_8
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
16m 55s
BEE-spoke-data/roformer-L8-v0.2-KI_2048
0.99
1.0000e-8
-
true
-
250
false
16
roformer-L8-v0.2-KI_2048-swag_mc-12heads
0.00003
./_/runs/Feb06_21-44-09_c13a20ce-9725-401f-9ab6-9008f8465024
linear
./_/
8
4
-
roformer-L8-v0.2-KI_2048-swag_mc-12heads
200
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
16m 24s
BEE-spoke-data/roformer-L8-v0.2-KI_2048
0.99
1.0000e-8
-
true
-
250
false
16
roformer-L8-v0.2-KI_2048-swag_mc
0.00003
./_/runs/Feb06_21-15-08_c13a20ce-9725-401f-9ab6-9008f8465024
linear
./_/
8
4
-
roformer-L8-v0.2-KI_2048-swag_mc
200
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
7h 29m 31s
BEE-spoke-data/roformer-L8-v0.1-simplewiki-WW
0.98
0.000001
-
true
-
150
false
16
roformer-L8-v0.1-simplewiki-WW-knowledge-inoc-concat-v1_2048-vN
0.0003
./runtime/masked/outputs-roformer-L8-v0.1-simplewiki-WW-knowledge-inoc-concat-v1_2048-vN
polynomial
./runtime/masked/outputs-roformer-L8-v0.1-simplewiki-WW-knowledge-inoc-concat-v1_2048-vN
8
8
-
roformer-L8-v0.1-simplewiki-WW-knowledge-inoc-concat-v1_2048-vN
200
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
1h 3m 26s
BEE-spoke-data/roformerRV-L8-initNLI
0.98
0.000001
-
false
-
100
false
4
roformerRV-L8-initNLI-simple_wikipedia_LM_1024-WW
0.0004
./runtime/masked/outputs-roformerRV-L8-initNLI-simple_wikipedia_LM_1024-WW
constant_with_warmup
./runtime/masked/outputs-roformerRV-L8-initNLI-simple_wikipedia_LM_1024-WW
8
8
-
roformerRV-L8-initNLI-simple_wikipedia_LM_1024-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
true
-
-
-
-
-
-
54m 44s
pszemraj/roformer-base-initNLI
0.98
0.000001
-
true
-
100
false
4
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
0.0004
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
constant_with_warmup
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
8
8
-
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
9m 17s
pszemraj/roformer-base-initNLI
0.98
0.000001
-
false
-
100
false
8
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
0.0004
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
constant_with_warmup
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
6
6
-
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
52s
pszemraj/roformer-base-initNLI
0.98
0.000001
-
false
-
100
false
4
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
0.0004
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
constant_with_warmup
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
8
8
-
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
49s
pszemraj/roformer-base-initNLI
0.98
0.000001
-
false
-
100
false
4
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
0.0004
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
constant_with_warmup
./runtime/masked/outputs-roformer-base-initNLI-simple_wikipedia_LM_1024-WW
8
8
-
roformer-base-initNLI-simple_wikipedia_LM_1024-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
1h 4m 48s
BEE-spoke-data/roformer-L8-initNLI
0.98
0.000001
-
false
-
100
false
4
roformer-L8-initNLI-simple_wikipedia_LM_1024-WW
0.0004
./runtime/masked/outputs-roformer-L8-initNLI-simple_wikipedia_LM_1024-WW
constant_with_warmup
./runtime/masked/outputs-roformer-L8-initNLI-simple_wikipedia_LM_1024-WW
8
8
-
roformer-L8-initNLI-simple_wikipedia_LM_1024-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
-
17m 14s
pszemraj/roformerRV-L8-sAH
0.98
0.000001
-
false
-
100
false
4
roformerRV-L8-sAH-simple_wikipedia_LM_512-WW
0.0004
./runtime/masked/outputs-roformerRV-L8-sAH-simple_wikipedia_LM_512-WW
constant_with_warmup
./runtime/masked/outputs-roformerRV-L8-sAH-simple_wikipedia_LM_512-WW
16
16
-
roformerRV-L8-sAH-simple_wikipedia_LM_512-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
true
-
-
-
-
-
-
14m 10s
BEE-spoke-data/roformer-L8-initNLI
0.98
0.000001
-
false
-
100
false
4
roformer-L8-initNLI-simple_wikipedia_LM_512-WW
0.0004
./runtime/masked/outputs-roformer-L8-initNLI-simple_wikipedia_LM_512-WW
constant_with_warmup
./runtime/masked/outputs-roformer-L8-initNLI-simple_wikipedia_LM_512-WW
16
16
-
roformer-L8-initNLI-simple_wikipedia_LM_512-WW
100
-
-
true
inductor
-
101
768
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
false
-
-
-
-
-
1-20
of 83