Pszemraj's workspace
Runs
210
Name
10 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
_name_or_path
activation
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
add_lm_hidden_dense_layer
add_token_type_embeddings
approx_mode
architectures
attention_activation
attention_probs_dropout_prob
attention_type
auto_find_batch_size
bf16
bf16_full_eval
bidirectional
block_per_row
block_size
bos_token_id
chunk_size
chunk_size_feed_forward
cls_token_id
conv_kernel_size
dataloader_drop_last
dataloader_num_workers
dataloader_persistent_workers
dataloader_pin_memory
ddp_timeout
debug
dim
disable_tqdm
diversity_penalty
do_eval
do_predict
do_sample
do_train
dropout_prob
early_stopping
ema_beta_range
ema_delta_alpha_range
ema_gamma_omega_range
ema_projection_size
Finished
-
pszemraj
1h 44s
-
pszemraj/mega-enc-MKVs-small-claude_tok
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
65002
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
-
pszemraj
1h 4m 51s
-
./mega-longchunk-simplepos-bert.json
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaForMaskedLM"]
softmax
0
-
false
true
false
true
-
-
101
1024
0
101
-
false
30
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Finished
-
pszemraj
cl100k-v2
3h 2s
-
BEE-spoke-data/mega-enc-MKVs-L8-v0.1-simplewiki_1k
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaForMaskedLM"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
8
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
-
pszemraj
cl100k-v1
1h 5m 3s
-
BEE-spoke-data/mega-enc-MKVs-L8-v0.1-simplewiki_1k
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaForMaskedLM"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
8
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
-
pszemraj
cl100k-v1
27m 44s
-
BEE-spoke-data/mega-enc-MKVs-L8-v0.1-simplewiki_1k
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaForMaskedLM"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
8
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
-
pszemraj
cl100k-v1
2h 40m 58s
-
./tiktoken-mlm-longchunk-1.json
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaForMaskedLM"]
softmax
0
-
false
true
false
true
-
-
100277
1024
0
-
-
false
8
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
--bf16 load bf16
pszemraj
1h 51m 8s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
16
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
--bf16 load bf16
pszemraj
58m 37s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
16
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
duplicate
pszemraj
12m 35s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
16
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
--bf16 load fp32
pszemraj
1h 59m 3s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
16
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
2.2.0 and inductor
pszemraj
1h 1m 22s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
2.3 nightly
pszemraj
11m 51s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
bf16 partial test
pszemraj
3m 55s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
-
pszemraj
28s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
-
pszemraj
27s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Failed
-
pszemraj
42s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
true
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
-
pszemraj
3m 30s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
false
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
-
pszemraj
3m 13s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
false
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
-
pszemraj
4m 47s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
false
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
Killed
-
pszemraj
5m 12s
-
pszemraj/mega-enc-MKV-L8-simple
silu
false
0.9
0.98
1.0000e-7
false
false
true
-
["MegaModel"]
softmax
0
-
false
false
false
true
-
-
0
1024
0
-
-
false
18
false
true
1800
[]
-
false
0
true
false
false
true
0.05
false
0.02
0.2
1
32
1-20
of 39