Gatti's workspace
Runs
185
Name
32 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
Adafactor
RAdam
_recursive_
lr_scheduler
model
opt
epoch
lr-Adafactor/pg1
lr-Adafactor/pg2
lr-Adafactor/pg3
lr-Adafactor/pg4
test/b_cubed_f1_score
test/b_cubed_precision
test/b_cubed_recall
test/ceafe_f1_score
test/ceafe_precision
test/ceafe_recall
test/cluster_mention_f1_score
test/cluster_mention_precision
test/cluster_mention_recall
test/conll2012_f1_score
test/conll2012_precision
test/conll2012_recall
test/coreference_loss
test/full_b_cubed_f1_score
test/full_b_cubed_precision
test/full_b_cubed_recall
test/full_ceafe_f1_score
test/full_ceafe_precision
test/full_ceafe_recall
test/full_conll2012_f1_score
test/full_conll2012_precision
test/full_conll2012_recall
test/full_loss
test/full_muc_f1_score
test/full_muc_precision
test/full_muc_recall
test/mention_f1_score
test/mention_loss
test/mention_precision
test/mention_recall
test/muc_f1_score
test/muc_precision
test/muc_recall
Failed
giuliano-martinelli
2d 20h 34m 40s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
72
0.000015136
0.000015136
0.00022703
0.00022703
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
2m 54s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
giuliano-martinelli
7d 3h 40m 45s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes'}
Adafactor
137
0
0
0
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
2d 19h 29m 27s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
113
0.00001718
0.00001718
0.0002577
0.0002577
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
13m 29s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
1.8000e-7
1.8000e-7
0.0000027
0.0000027
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
8m 19s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
1.8000e-7
1.8000e-7
0.0000027
0.0000027
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
35m 26s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
1.8000e-7
1.8000e-7
0.0000027
0.0000027
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
20h 1m 13s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
38
0.000017158
0.000017158
0.00025737
0.00025737
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
1d 16m 37s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
13
0
0
0
0
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
3m 54s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
0
5.8000e-7
5.8000e-7
0.0000087
0.0000087
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
3d 3m 7s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
164
0.0000010244
0.0000010244
0.000015367
0.000015367
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
giuliano-martinelli
12s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
5d 17h 22m 23s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
105
0.0000032467
0.0000032467
0.0000487
0.0000487
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
22s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
giuliano-martinelli
50s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
3m 11s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
14s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
false
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes', 'negatives': False}
Adafactor
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
22h 38m 43s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross_n.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes'}
Adafactor
52
0.000015491
0.000015491
0.00023237
0.00023237
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
3h 25m 8s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross_old.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes'}
Adafactor
35
0.00001478
0.00001478
0.0002217
0.0002217
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Failed
giuliano-martinelli
6h 2m 25s
-
{'_target_': 'transformers.Adafactor', 'lr': 3e-05, 'weight_decay': 0.01, 'scale_parameter': False, 'relative_step': False}
{'_target_': 'torch.optim.RAdam', 'lr': 2e-05}
-
{'num_warmup_steps': 10, 'num_training_steps': 10000}
{'_target_': 'models.model_cross_n.Maverick_cross', 'language_model': 'deberta-v3-large', 'huggingface_model_name': 'microsoft/deberta-v3-large', 'freeze_encoder': False, 'span_representation': 'concat_start_end', 'cluster_representation': 'transformer', 't': 'mes'}
Adafactor
83
0.000014758
0.000014758
0.00022137
0.00022137
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
1-20
of 185