Pszemraj's workspace
Runs
64
Name
11 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
_block_types
_name_or_path
accelerator_config.even_batches
accelerator_config.split_batches
accelerator_config.use_seedable_sampler
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
architectures
attention_bias
attention_dropout
attention_window_size
attn_layer_offset
attn_layer_period
auto_find_batch_size
bf16
bf16_full_eval
block_types
bos_token_id
chunk_size_feed_forward
conv1d_width
dataloader_drop_last
dataloader_num_workers
dataloader_persistent_workers
dataloader_pin_memory
ddp_timeout
debug
disable_tqdm
diversity_penalty
do_eval
do_predict
do_sample
do_train
early_stopping
embeddings_scale_by_sqrt_dim
encoder_no_repeat_ngram_size
eos_token_id
eval_delay
eval_do_concat_batches
eval_steps
evaluation_strategy
expert_layer_offset
Finished
-
pszemraj
1d 21h 50m 33s
-
-
pszemraj/jamba-900M-v0.13-KIx2
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
32
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Killed
-
pszemraj
37m 14s
-
-
pszemraj/jamba-900M-v0.13-KIx2
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
32
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Finished
-
pszemraj
9h 41m 28s
-
-
pszemraj/jamba-H1024_L12-v0.12-fineweb-100k-xlong_16k
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
14
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
200
steps
1
Finished
-
pszemraj
6h 12m 54s
-
-
pszemraj/jamba-H1024_L12-v0.11-code_tb_fiction
true
false
true
false
0.9
0.995
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
14
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
200
steps
1
Killed
-
pszemraj
2m 58s
-
-
pszemraj/jamba-H1024_L12-v0.11-code_tb_fiction
true
false
true
false
0.9
0.995
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
14
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Killed
no compile
pszemraj
14m 51s
-
-
pszemraj/jamba-H1024_L12-v0.11-code_tb_fiction
true
false
true
false
0.9
0.995
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
8
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
200
steps
1
Finished
-
pszemraj
16h 9m 44s
-
-
pszemraj/jamba-H1024_L12-v0.10-fineweb-200k_long
true
false
true
false
0.9
0.995
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
16
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Finished
-
pszemraj
12h 1m 55s
-
-
pszemraj/jamba-H1024_L12-v0.09-fineweb-1M_longish
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
32
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Finished
-
pszemraj
1d 1h 30m 56s
-
-
pszemraj/jamba-H1024_L12-v0.08-wikiedu_mix
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
20
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Killed
-
pszemraj
1m 54s
-
-
pszemraj/jamba-H1024_L12-v0.08-wikiedu_mix
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
14
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Finished
-
pszemraj
1d 13h 17m 51s
-
["recurrent","recurrent","attention"]
pszemraj/griffin-1024-llama3t-8layer-simplewiki-silu
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
128000
0
4
false
64
false
true
1800
[]
false
0
true
false
false
true
false
true
0
128001
0
true
400
steps
-
Finished
-
pszemraj
3d 7h 25m 40s
-
-
pszemraj/jamba-H1024_L12-v0.07-fineweb-1M-med
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
32
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Finished
-
pszemraj
1d 11h 34m 39s
-
["recurrent","recurrent","attention"]
pszemraj/griffin-v0.01-c3t-8layer-simplewiki-silu
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
0
0
4
false
96
false
true
1800
[]
false
0
true
false
false
true
false
true
0
0
0
true
400
steps
-
Killed
-
pszemraj
25m 30s
-
["recurrent","recurrent","attention"]
pszemraj/griffin-v0.01-c3t-8layer-simplewiki-silu
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
0
0
4
false
16
false
true
1800
[]
false
0
true
false
false
true
false
true
0
0
0
true
400
steps
-
Finished
-
pszemraj
5h 16m 12s
-
["recurrent","recurrent","attention"]
griffin-1024-llama3t-8layer
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
128000
0
4
false
16
false
true
1800
[]
false
0
true
false
false
true
false
true
0
128001
0
true
100
steps
-
Finished
-
pszemraj
10h 25m 8s
-
["recurrent","recurrent","attention"]
silu-griffin-1024-c3t
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
0
0
4
false
32
false
true
1800
[]
false
0
true
false
false
true
false
true
0
0
0
true
100
steps
-
Finished
-
pszemraj
4h 47m 4s
-
["recurrent","recurrent","attention"]
silu-griffin-1024-c3t-8layer-16h
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
0
0
4
false
16
false
true
1800
[]
false
0
true
false
false
true
false
true
0
0
0
true
100
steps
-
Killed
is worse
pszemraj
4h 40m 49s
-
["recurrent","recurrent","attention"]
silu-griffin-1024-c3t-8layer-noscale
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
0
0
4
false
16
false
true
1800
[]
false
0
true
false
false
true
false
false
0
0
0
true
100
steps
-
Killed
-
pszemraj
18h 48m 13s
-
-
pszemraj/jamba-H1024_L12-v0.06-dolma-500k
true
false
true
false
0.9
0.999
1.0000e-8
false
["JambaForCausalLM"]
-
0
-
2
4
false
true
false
-
0
0
-
false
6
false
true
1800
[]
false
0
true
false
false
true
false
-
0
0
0
true
400
steps
1
Failed
-
pszemraj
9h 21m 40s
-
["recurrent","recurrent","attention"]
./griffin-1024-c3t.json
true
false
true
false
0.9
0.99
1.0000e-7
false
["RecurrentGemmaForCausalLM"]
false
0
2048
-
-
false
false
false
["recurrent","recurrent","attention"]
0
0
4
false
32
false
true
1800
[]
false
0
true
false
false
true
false
true
0
0
0
true
100
steps
-
1-20
of 48