Dan-braun's workspace
Runs
183
Name
1 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
C
batch_size
command
faithfulness_coeff
image_freq
image_on_first_step
importance_loss_coeff
importance_minimality_coeff
is_embed_unembed_recon
layerwise_stochastic_ci_masked_recon_coeff
log_ce_losses
lr
lr_schedule
lr_warmup_pct
method
metric.goal
metric.name
n_ci_mlp_neurons
n_eval_steps
n_mask_samples
output_loss_type
parameters.importance_minimality_coeffs
parameters.lrs
parameters.seeds
pnorm
pretrained_model_class
pretrained_model_path
print_freq
program
seed
steps
stochastic_ci_masked_recon_coeff
stochastic_recon_coeff
stochastic_recon_layerwise_coeff
target_module_patterns
task_config.data_generation_type
task_config.feature_probability
task_config.task_name
wandb_project
wandb_run_name_prefix
layerwise_random_recon_coeff
lp_sparsity_coeff
m
n_gate_hidden_neurons
Finished
-
dan-braun
3m 30s
-
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/bohlithe
1000
-
0
40000
-
1
1
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
26s
-
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
0
40000
-
1
1
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 56s
20
4096
-
1
5000
true
-
3e-2
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 50s
20
4096
-
1
5000
true
-
1e-2
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 51s
20
4096
-
1
5000
true
-
3e-3
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 51s
20
4096
-
1
5000
true
-
1e-3
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 54s
20
4096
-
1
5000
true
-
3e-4
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 51s
20
4096
-
1
5000
true
-
1e-4
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
47s
-
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
lee-goodfire
5m 1s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
4
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
lee-goodfire
5m 2s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
3
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
lee-goodfire
5m 30s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
2
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
lee-goodfire
5m 47s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
1
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
lee-goodfire
5m 47s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
1m 34s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
-
dan-braun
5m
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
4
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 1s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
3
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 3s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
2
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 4s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
1
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 5s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 14s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
3
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
6m 17s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
2
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 57s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
1
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 58s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 54s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
4
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 59s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
3
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 54s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
2
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 1s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
1
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 57s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
25s
-
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/5b5jinc7
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Failed
-
dan-braun
4s
-
-
4096
-
-
5000
true
-
-
false
-
false
0.001
cosine
0
-
-
-
-
100
-
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/wckft4gh
1000
-
0
40000
-
-
-
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
1
0.0001
200
16
Killed
-
dan-braun
20s
-
200
4096
-
1
5000
true
0.0001
-
false
1
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
0
40000
1
-
-
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
44s
-
200
4096
-
1
5000
true
0.0001
-
false
1
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
0
40000
1
-
-
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
17s
-
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
1m 6s
-
200
4096
-
1
5000
true
0.0001
-
false
1
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
0
40000
1
-
-
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
1m 27s
-
200
4096
-
1
5000
true
0.0001
-
false
1
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
0
40000
1
-
-
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Killed
-
dan-braun
1m 25s
-
200
4096
-
1
5000
true
0.0001
-
false
1
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
0
40000
1
-
-
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Failed
-
dan-braun
4s
-
200
4096
-
1
5000
true
0.0001
-
false
1
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/wckft4gh
1000
-
0
40000
1
-
-
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 57s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 11s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/5b5jinc7
1000
-
4
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 10s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/5b5jinc7
1000
-
3
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 11s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/5b5jinc7
1000
-
2
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 13s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/5b5jinc7
1000
-
1
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
5m 12s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/5b5jinc7
1000
-
0
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
3m 44s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
4
40000
-
1
1
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
3m 43s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
3
40000
-
1
1
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
3m 43s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
2
40000
-
1
1
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
3m 43s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
1
40000
-
1
1
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
3m 43s
200
4096
-
1
5000
true
-
0.0001
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
2
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2gbkt21m
1000
-
0
40000
-
1
1
["linear1","linear2"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
Finished
-
dan-braun
4m 57s
20
4096
-
1
5000
true
-
0.003
false
-
false
0.001
cosine
0
-
-
-
16
100
1
mse
-
-
-
1
spd.experiments.tms.models.TMSModel
wandb:spd-train-tms/runs/2pmvzbbe
1000
-
4
40000
-
1
1
["linear1","linear2","hidden_layers.0"]
at_least_zero_active
0.05
tms
spd-tms
-
-
-
-
1-50
of 125