Pszemraj's workspace
Runs
15
Name
5 visualized
State
Created
Tags
Notes
Runtime
Crashed
dataload issue
1h 36m 16s
Failed
-
24s
Finished
polar_express
16h 35m 12s
Killed
newton_schulz
13m 14s
Killed
speed test (train)
7m 23s
Killed
speed test (train)
56m 8s
Killed
-
36m 21s
Finished
byte-fallback
-
13h 5m 44s
Finished
byte-fallback
-
13h 59m 12s
Finished
byte-fallback
-
13h 45m 36s
Finished
unoptimized tokenizer
13h 50m 14s
Finished
uncased
-
13h 37m 41s
Finished
-
17h 44m 41s
Crashed
has dropout
-
10h 59m 37s
Finished
has dropout
uncased
-
15h 5m 26s
Commit
GitHub
model.vocab_size
_raw_model_dict.dropout_prob
_raw_model_dict.hidden_size
_raw_model_dict.num_attention_heads
_raw_model_dict.num_hidden_layers
dataset.num_proc
dataset.num_workers
model.dropout_prob
model.hidden_size
model.num_attention_heads
model.num_hidden_layers
optimizer.muon_config.clipping_alpha
optimizer.muon_config.clipping_threshold
optimizer.muon_config.clipping_warmup_steps
optimizer.muon_config.detect_anomalies
optimizer.muon_config.enable_clipping
optimizer.muon_config.enable_profiling
optimizer.muon_config.log_dir
optimizer.muon_config.log_interval
optimizer.muon_config.log_max_logits
optimizer.muon_config.monitor_attention_entropy
optimizer.muon_config.muon_beta
optimizer.muon_config.muon_decay
optimizer.muon_config.ns_steps
optimizer.muon_config.offload_hooks_to_cpu
optimizer.name
tokenizer.max_length
tokenizer.name
tokenizer.vocab_size
trainer.early_stopping
trainer.eval_batch_size
trainer.eval_strategy
trainer.gradient_accumulation_steps
trainer.gradient_checkpointing
trainer.greater_is_better
trainer.load_best_model_at_end
trainer.mixed_precision
trainer.num_train_epochs
trainer.per_device_eval_batch_size
trainer.per_device_train_batch_size
trainer.save_total_limit
trainer.train_batch_size
optimizer.muon_config.orthogonalization
train/accuracy
train/batches
train/grad_norm
train/learning_rate
train/local_num_correct
train/local_num_pred
b1eb3733713b2e141d8847704fba33ae4f581c17
32000
0
768
12
12
8
4
0
768
12
12
0.5
50
0
false
true
-
-
-
-
-
0.95
0.01
5
-
muonclip
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
32000
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
polar_express
0.50664
32000
1.40909
0.0000998
381678
753351
b1eb3733713b2e141d8847704fba33ae4f581c17
32000
0
768
12
12
8
4
0
768
12
12
0.5
50
0
false
true
-
-
-
-
-
0.95
0.01
5
-
muonclip
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
32000
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
polar_express
-
-
-
-
-
-
93f197989dbe70401601359ef7aebba9acfb63f4
32000
0
768
12
12
8
4
0
768
12
12
0.5
50
0
false
true
-
-
-
-
-
0.95
0
5
-
muonclip
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
32000
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
polar_express
0.66402
400000
1.156
0.00001
502157
756235
5dedcaac1be40f17a26ddd3fc4bc67152ada52bf
32000
0
768
12
12
8
4
0
768
12
12
0.5
50
0
false
true
-
-
-
-
-
0.95
0
5
-
muonclip
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
32000
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
-
0.1286
4800
3.43388
0.000024008
98432
765394
b42bad8bc042981c81e12183b89cb115c723047d
32000
0
768
12
12
8
4
0
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
32000
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
-
0.10915
3200
2.71046
0.000016008
83532
765277
e22182d99a3eee30f4b33556a61e1f6f1705c766
32000
0
768
12
12
8
4
0
768
12
12
0.5
50
0
false
true
false
./logs/muonclip
100
false
false
0.95
0
5
false
muonclip
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
32000
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
-
0.31311
12400
2.5014
0.000062004
239451
764755
6a8eba59dd590787ff87ec66698f46b94846fa82
128128
0
512
16
16
8
4
0
512
16
16
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
4096
microsoft/deberta-v3-base
128128
0
-
steps
8
true
true
false
bf16
3
8
8
1
-
-
0.2806
20000
1
0.000050005
201559
718303
a349e22bd050cb3b9c19ec607cd0b1068f32e1dd
24064
0
768
12
12
8
4
0
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
4096
pszemraj/bytebpe-tokenizer-24k-en_code-mlm
24064
0
-
steps
4
false
true
false
bf16
3
16
16
1
-
-
0.67391
400000
1.6807
0.00001
519208
770442
a349e22bd050cb3b9c19ec607cd0b1068f32e1dd
32128
0
768
12
12
8
4
0
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
4096
pszemraj/bytebpe-tokenizer-32k-mlm-uncased
32128
0
-
steps
4
false
true
false
bf16
3
16
16
1
-
-
0.65046
400000
1.82954
0.00001
482197
741320
a349e22bd050cb3b9c19ec607cd0b1068f32e1dd
32128
0
768
12
12
8
4
0
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
4096
pszemraj/bytebpe-tokenizer-32k-mlm
32128
0
-
steps
4
false
true
false
bf16
3
16
16
1
-
-
0.66444
400000
1.69592
0.00001
503668
758029
a349e22bd050cb3b9c19ec607cd0b1068f32e1dd
32128
0
768
12
12
8
4
0
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
4096
pszemraj/BPE-tokenizer-32k-mlm-mBERT_ExProse
32128
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
-
0.66284
400000
1.7503
0.00001
504301
760818
a349e22bd050cb3b9c19ec607cd0b1068f32e1dd
30592
0
768
12
12
8
4
0
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
4096
chandar-lab/NeoBERT
30592
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
-
0.63741
400000
1.79237
0.00001
459999
721666
8a0dadfa10ea23f7338993e194b36400c73c673a
50368
0
768
12
12
4
2
0
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
4096
answerdotai/ModernBERT-base
50368
0
-
steps
4
true
true
false
bf16
3
16
16
3
-
-
0.64338
400000
1
0.00001
479892
745891
dd1edd9f8fca5f87645c93444d5d814fb5f8fd73
31999
0.05
768
12
12
8
4
0.05
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
31999
0
-
steps
4
false
true
false
bf16
3
16
16
3
-
-
0.64034
289200
1.44409
0.000031729
486403
759601
f759ae1ce3feee156eb41f8d2b89db336da1771c
31999
0.05
768
12
12
8
4
0.05
768
12
12
-
-
-
-
-
-
-
-
-
-
-
-
-
-
adamw
1024
BEE-spoke-data/wordpiece-tokenizer-32k-en_code-msp
31999
-
32
-
4
false
-
-
no
-
16
16
-
16
-
0.65316
400000
1.53944
0.00001
494898
757702
1-15
of 15