Zachdata's workspace
Runs
13
Name
5 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
activation_dim
autocast_dtype
checkpoint_steps
ctx_len
decay_start
device
dict_class
dict_size
dict_size_multiple
dtype
estimated_sparsity
eval_batch_size
eval_n_batches
experiment_type
gradient_clip_norm
group_fractions
group_weights
hook_name
k
kl_coeff
kl_warmup_steps
layer
lm_name
log_steps
log_var_init
lr
model_name
model_type
n_ctxs
out_batch_size
refresh_batch_size
save_dir
seed
sparsity_warmup_steps
steps
submodule_name
topk_mode
total_steps
trainer_class
use_april_update_mode
use_batch_topk
use_wandb
var_flag
wandb_entity
Crashed
-
zachdata
1d 4h 54m
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
1
1500
0
gelu-1l
1000
-2
0.00007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
1
zachdata
Finished
-
zachdata
12m 53s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
1
1500
0
gelu-1l
1000
-2
0.00007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
1
zachdata
Crashed
-
zachdata
1d 5h 10m 44s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
1
1500
0
gelu-1l
1000
-2
0.007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
1
zachdata
Finished
-
zachdata
9m 1s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
1
1500
0
gelu-1l
1000
-2
0.001
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
8m 52s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
0.01
1500
0
gelu-1l
1000
-2
0.0007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
8m 52s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
1
1500
0
gelu-1l
1000
-2
0.0007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
8m 59s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
30
1500
0
gelu-1l
1000
-2
0.0007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
8m 51s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
100
1500
0
gelu-1l
1000
-2
0.0007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
1s
-
2048
bfloat16
[]
128
800
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0039063
32
3
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
32
500
100
0
gelu-1l
50
-2
0.0005
gelu-1l
gelu-1l
500
128
16
./experiments
42
50
1000
blocks.0.mlp.hook_post
magnitude
1000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
1s
-
2048
bfloat16
[]
128
800
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0039063
32
3
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
32
500
100
0
gelu-1l
50
-2
0.0005
gelu-1l
gelu-1l
500
128
16
./experiments
42
50
1000
blocks.0.mlp.hook_post
magnitude
1000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
1s
-
2048
bfloat16
[]
128
800
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0039063
32
3
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
32
500
100
0
gelu-1l
50
-2
0.0005
gelu-1l
gelu-1l
500
128
16
./experiments
42
50
1000
blocks.0.mlp.hook_post
magnitude
1000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
1s
-
2048
bfloat16
[]
128
800
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0039063
32
3
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
32
500
100
0
gelu-1l
50
-2
0.0005
gelu-1l
gelu-1l
500
128
16
./experiments
42
50
1000
blocks.0.mlp.hook_post
magnitude
1000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
Finished
-
zachdata
9m 10s
-
2048
bfloat16
[60000]
128
48000
cuda
MatryoshkaVSAEIso
8192
4
bfloat16
0.0079346
24
6
matryoshka_vsae_batch_topk
1
[0.25,0.25,0.25,0.25]
[0.4,0.3,0.2,0.1]
blocks.0.mlp.hook_post
65
40
1500
0
gelu-1l
1000
-2
0.0007
gelu-1l
gelu-1l
2500
192
12
./experiments
42
3000
60000
blocks.0.mlp.hook_post
magnitude
60000
MatryoshkaVSAEIsoTrainer
true
true
true
0
zachdata
1-13
of 13