Danbraunai-apollo's workspace
Runs
169
Name
0 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
act_frequency_n_tokens
batch_size
collect_act_frequency_every_n_samples
collect_output_metrics_every_n_samples
cooldown_samples
effective_batch_size
eval_data.column_name
eval_data.dataset_name
eval_data.is_tokenized
eval_data.n_ctx
eval_data.seed
eval_data.split
eval_data.streaming
eval_data.tokenizer_name
eval_every_n_samples
eval_n_samples
log_every_n_grad_steps
loss.sparsity.coeff
lr
lr_schedule
max_grad_norm
min_lr_factor
n_samples
saes.sae_positions
save_dir
seed
tlens_model_name
train_data.column_name
train_data.dataset_name
train_data.is_tokenized
train_data.n_ctx
train_data.seed
train_data.split
train_data.streaming
train_data.tokenizer_name
wandb_project
wandb_run_name_prefix
warmup_samples
loss.logits_kl.coeff
loss.out_to_in
loss.sparsity.p_norm
saes.dict_size_to_input_ratio
saes.retrain_saes
saes.type_of_sparsifier
Finished
jordantensor
10h 14m 24s
500000
8
40000
-
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
0.5
0.001
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/e2e_sae-main/e2e_sae-main/e2e_sae/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
lr-comparison_
20000
0.5
-
-
60
-
-
Crashed
jordantensor
6h 58m 13s
500000
8
40000
-
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
1.75
0.001
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/e2e_sae-main/e2e_sae-main/e2e_sae/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
lr-comparison_
20000
0.5
-
-
60
-
-
Finished
danbraunai-apollo
6h 6m 13s
-
500000
8
40000
-
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
13
0.002
cosine
10
0.1
400000
blocks.6.hook_resid_pre
/data/dan_braun/e2e_sae/e2e_sae/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-layerwise_play
lr-comparison_
20000
-
-
-
60
-
-
Finished
danbraunai-apollo
4h 43m 27s
-
500000
8
40000
-
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
8
0.002
cosine
10
0.1
400000
blocks.6.hook_resid_pre
/data/dan_braun/e2e_sae/e2e_sae/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-layerwise_play
lr-comparison_
20000
-
-
-
60
-
-
Finished
danbraunai-apollo
4h 44m 41s
-
500000
8
40000
-
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
4
0.002
cosine
10
0.1
400000
blocks.6.hook_resid_pre
/data/dan_braun/e2e_sae/e2e_sae/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-layerwise_play
lr-comparison_
20000
-
-
-
60
-
-
Finished
danbraunai-apollo
4h 50m 15s
-
500000
8
40000
-
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
1
0.002
cosine
10
0.1
400000
blocks.6.hook_resid_pre
/data/dan_braun/e2e_sae/e2e_sae/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-layerwise_play
lr-comparison_
20000
-
-
-
60
-
-
Finished
danbraunai-apollo
11h 8m 59s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
50
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/dan_braun/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.875
-
-
60
-
-
Finished
danbraunai-apollo
11h 3m 58s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
35
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/dan_braun/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.875
-
-
60
-
-
Finished
danbraunai-apollo
11h 5m 48s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
20
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/dan_braun/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.875
-
-
60
-
-
Finished
danbraunai-apollo
11h 5m 17s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
10
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/dan_braun/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.875
-
-
60
-
-
Finished
danbraunai-apollo
11h 9m 28s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
5
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/dan_braun/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.875
-
-
60
-
-
Finished
danbraunai-apollo
15h 33m 11s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
35
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.75
-
-
60
-
-
Finished
danbraunai-apollo
15h 35m 47s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
50
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.75
-
-
60
-
-
Finished
danbraunai-apollo
15h 39m 16s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
20
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.75
-
-
60
-
-
Finished
danbraunai-apollo
15h 38m 59s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
10
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.75
-
-
60
-
-
Finished
danbraunai-apollo
15h 44m 45s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
5
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.75
-
-
60
-
-
Finished
danbraunai-apollo
15h 32m 33s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
25
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.75
-
-
60
-
-
Finished
danbraunai-apollo
15h 44m 43s
-
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
40000
500
20
15
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.75
-
-
60
-
-
Finished
jordantensor
11h 3m 2s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
100
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Finished
jordantensor
11h 2m 53s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
25
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Finished
jordantensor
11h 2m 55s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.5
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Finished
jordantensor
11h 4m 10s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
35
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Finished
jordantensor
11h 43s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
10
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Finished
jordantensor
11h 5s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.05
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Finished
jordantensor
11h 3m 27s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
1.5
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Finished
jordantensor
11h 29s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
50
0.0005
cosine
10
0.1
400000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
recon_
20000
0.5
-
-
60
-
-
Killed
danbraunai-apollo
21m 18s
-
500000
4
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
1.5
0.001
cosine
10
0.1
400000
["blocks.6.hook_resid_pre"]
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.5
-
1
60
false
sae
Killed
danbraunai-apollo
21m 54s
-
500000
4
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
3
0.001
cosine
10
0.1
400000
["blocks.6.hook_resid_pre"]
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.5
-
1
60
false
sae
Killed
danbraunai-apollo
34m 22s
-
500000
4
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.5
0.001
cosine
10
0.1
400000
["blocks.6.hook_resid_pre"]
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e
20000
0.5
-
1
60
false
sae
Finished
danbraunai-apollo
3h 53m 59s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.2
0.0005
cosine
10
0.1
100000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
100k_
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
3h 54m 15s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
1
0.0005
cosine
10
0.1
100000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
100k_
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
3h 53m 19s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.5
0.0005
cosine
10
0.1
100000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
100k_
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 46m 43s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.2
0.0005
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
200k_
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 38m 29s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
1
0.0005
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
200k_
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 48m 22s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.5
0.0005
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
200k_
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 46m 43s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.2
0.0001
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 48m 39s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
3
0.0001
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 49m 45s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
1
0.0001
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 46m 49s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.05
0.0001
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 43m 35s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
1.5
0.0001
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 45m 27s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
0.5
0.0001
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Finished
danbraunai-apollo
7h 50m 19s
500000
4
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
200
20
10
0.0001
cosine
10
0.1
200000
blocks.10.hook_resid_pre
/mnt/ssd-interp/dan/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Failed
jordantensor
7h 33m 23s
500000
16
40000
0
0
16
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
20000
500
20
0.005
0.0005
cosine
10
0.1
400000
blocks.6.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
-
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
-
train
true
gpt2
gpt2-e2e_play
20000
1
-
-
60
-
-
Finished
jordantensor
5h 8m 38s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
10
0.1
200000
blocks.11.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 8m 34s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.05
0.001
cosine
10
0.1
200000
blocks.11.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 9m 6s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.2
0.001
cosine
10
0.1
200000
blocks.11.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 8m 43s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
10
0.1
200000
blocks.11.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 8m 12s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
1.5
0.001
cosine
10
0.1
200000
blocks.11.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 9m 14s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
3
0.001
cosine
10
0.1
200000
blocks.11.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 8m 27s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
10
0.001
cosine
10
0.1
200000
blocks.11.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 25m 46s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
10
0.1
200000
blocks.9.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 25m 50s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.05
0.001
cosine
10
0.1
200000
blocks.9.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 25m 3s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.2
0.001
cosine
10
0.1
200000
blocks.9.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 24m 55s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
10
0.1
200000
blocks.9.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 24m 38s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
1.5
0.001
cosine
10
0.1
200000
blocks.9.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 25m 55s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
3
0.001
cosine
10
0.1
200000
blocks.9.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 24m 49s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
10
0.001
cosine
10
0.1
200000
blocks.9.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 42m 10s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
10
0.1
200000
blocks.7.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 42m 3s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.05
0.001
cosine
10
0.1
200000
blocks.7.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 42m 16s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.2
0.001
cosine
10
0.1
200000
blocks.7.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 41m 20s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
10
0.1
200000
blocks.7.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 40m 59s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
1.5
0.001
cosine
10
0.1
200000
blocks.7.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 40m 36s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
3
0.001
cosine
10
0.1
200000
blocks.7.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 40m 47s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
10
0.001
cosine
10
0.1
200000
blocks.7.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
8h 47m 17s
-
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.8
0.001
cosine
10
0.1
300000
["blocks.6.hook_resid_pre"]
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
1
60
false
sae
Finished
jordantensor
6h 16s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 59m 26s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.05
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 58m 27s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.2
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 1s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 58m 13s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
1.5
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 57m 58s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
3
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 58m 14s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
10
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 58m 51s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
30
0.001
cosine
20
0.1
200000
blocks.5.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 16m 7s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
20
0.1
200000
blocks.3.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 16m 35s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.2
0.001
cosine
20
0.1
200000
blocks.3.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 15m 14s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
1.5
0.001
cosine
20
0.1
200000
blocks.3.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 15m 36s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
20
0.1
200000
blocks.3.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 15m 8s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
3
0.001
cosine
20
0.1
200000
blocks.3.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 14m 2s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
10
0.001
cosine
20
0.1
200000
blocks.3.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 14m 8s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
30
0.001
cosine
20
0.1
200000
blocks.3.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 30m 25s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.2
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 31m 54s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.05
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 31m 31s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 30m 3s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 30m 3s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
1.5
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 31m 11s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
10
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 30m 45s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
3
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 30m 46s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
30
0.001
cosine
20
0.1
200000
blocks.1.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 32m 31s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
10
0.1
200000
blocks.8.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 32m 49s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.05
0.001
cosine
10
0.1
200000
blocks.8.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 16m 34s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
50
0.1
200000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
-
-
-
-
-
-
Finished
jordantensor
5h 33m 11s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.2
0.001
cosine
10
0.1
200000
blocks.8.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 33m 11s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
10
0.1
200000
blocks.8.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 33m 27s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
1.5
0.001
cosine
10
0.1
200000
blocks.8.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 32m 4s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
3
0.001
cosine
10
0.1
200000
blocks.8.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 32m 25s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
10
0.001
cosine
10
0.1
200000
blocks.8.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
5h 16m 57s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
50
0.1
200000
blocks.10.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
-
-
-
-
-
-
Finished
jordantensor
6h 6m 59s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.005
0.001
cosine
10
0.1
200000
blocks.4.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 7m 22s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.05
0.001
cosine
10
0.1
200000
blocks.4.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
Finished
jordantensor
6h 6m 46s
500000
8
40000
0
0
16
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
20000
200
20
0.5
0.001
cosine
10
0.1
200000
blocks.4.hook_resid_pre
/data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
0
gpt2-small
input_ids
apollo-research/Skylion007-openwebtext-tokenizer-gpt2
true
1024
0
train
true
gpt2
gpt2-e2e
20000
1
-
-
60
-
-
1-100
of 169