Framolfese's workspace
Runs
394
Name
394 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
Adafactor
Adafactor/_target_
Adafactor/lr
Adafactor/lr_scheduler/num_training_steps
Adafactor/lr_scheduler/num_warmup_steps
Adafactor/relative_step
Adafactor/scale_parameter
Adafactor/weight_decay
RAdam
Radam/_target_
Radam/lr
Radam/weight_decay
_recursive_
lr_scheduler
lr_scheduler/num_training_steps
lr_scheduler/num_warmup_steps
model
model/_target_
model/aggregation
model/coreference_mode
model/coreference_wait_n_epochs
model/deterministic_span_extraction
model/freeze_encoder
model/huggingface_model_name
model/incremental_model_hidden_size
model/incremental_model_num_layers
model/language_model
model/linear_layer_hidden_size
model/mention_mode
model/mode
model/pos_weight
model/pretrained_ckpt_path
model/representation
model/span_coref_aggregation
model/span_representation
model/topk
model/transformer_freeze
model/use_gold_training
opt
optimizer/_target_
optimizer/lr
optimizer/weight_decay
epoch
lr-Adafactor
Finished
-
giuliano-martinelli
2h 41m 49s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 3000, 'num_training_steps': 30000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'mdeberta-v3-base', 'huggingface_model_name': 'microsoft/mdeberta-v3-base', 'freeze_encoder': True, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
39
-
Finished
-
giuliano-martinelli
4m 3s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 3000, 'num_training_steps': 30000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'mdeberta-v3-base', 'huggingface_model_name': 'microsoft/mdeberta-v3-base', 'freeze_encoder': True, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
0
-
Finished
-
giuliano-martinelli
1m 18s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 3000, 'num_training_steps': 30000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'mdeberta-v3-base', 'huggingface_model_name': 'microsoft/mdeberta-v3-base', 'freeze_encoder': True, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
0
-
Finished
-
giuliano-martinelli
1d 1h 27m 50s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 3000, 'num_training_steps': 30000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'mdeberta-v3-base', 'huggingface_model_name': 'microsoft/mdeberta-v3-base', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
102
-
Crashed
-
giuliano-martinelli
5h 14m 33s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 6000, 'num_training_steps': 80000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'longformer-large-4096', 'huggingface_model_name': 'allenai/longformer-large-4096', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
1h 30m 32s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 3000, 'num_training_steps': 30000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
25s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0
-
Crashed
-
giuliano-martinelli
14m 30s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
false
{'num_warmup_steps': 10000, 'num_training_steps': 100000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
1h 29m 51s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
false
{'num_warmup_steps': 10000, 'num_training_steps': 100000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
13m 22s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
false
{'num_warmup_steps': 10000, 'num_training_steps': 100000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
25m 52s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
false
{'num_warmup_steps': 10000, 'num_training_steps': 100000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
32m 55s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
false
{'num_warmup_steps': 10000, 'num_training_steps': 100000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
2m 32s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
false
{'num_warmup_steps': 10000, 'num_training_steps': 100000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
-
-
Crashed
-
giuliano-martinelli
21h 34m 21s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 1500, 'num_training_steps': 15000}
-
-
{'_target_': 'models.model_spin.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'incremental_model_hidden_size': 768, 'incremental_model_num_layers': 1}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
284
-
Finished
-
giuliano-martinelli
2d 2h 48m 43s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 6000, 'num_training_steps': 80000}
-
-
{'_target_': 'models.model_s2e.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
137
-
Finished
-
giuliano-martinelli
14h 27m 23s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 1000, 'num_training_steps': 10000}
-
-
{'_target_': 'models.model_s2e.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
263
-
Finished
-
giuliano-martinelli
5h 52m 23s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 1000, 'num_training_steps': 10000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
104
-
Finished
-
giuliano-martinelli
8h 46m 8s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 1000, 'num_training_steps': 10000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
158
-
Finished
-
giuliano-martinelli
8h 48m 41s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
-
-
-
-
-
-
-
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
-
-
-
{'num_warmup_steps': 1000, 'num_training_steps': 10000}
-
-
{'_target_': 'models.model_lingmess.Model', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end'}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Adafactor
-
-
-
158
-
Failed
-
giuliano-martinelli
10m 21s
-
-
transformers.Adafactor
0.00003
-
-
false
false
0.01
-
-
-
-
-
-
60000
4000
-
models.model_spin.Model
-
-
-
-
false
microsoft/deberta-v3-small
768
1
deberta-v3-small
-
-
-
-
-
-
-
concat_start_end
-
-
-
Adafactor
-
-
-
0
-
1-20
of 394