Jnainani's workspace
Runs
35
Name
35 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
base_save_dir
cache_dir
crosscoder.dec_init_norm
crosscoder.k
crosscoder.n_latents
crosscoder.use_decoder_bias
crosscoder.use_encoder_bias
cuda_device
data.activations_harvester.cache_mode
data.activations_harvester.harvesting_batch_size
data.activations_harvester.inference_dtype
data.activations_harvester.llms
data.n_tokens_for_norm_estimate
data.token_sequence_loader.hf_dataset_name
data.token_sequence_loader.sequence_length
experiment_name
hookpoints
seed
train.batch_size
train.dead_latents_threshold_n_examples
train.gradient_accumulation_steps_per_batch
train.k_aux
train.lambda_aux
train.log_every_n_steps
train.num_steps
train.optimizer.betas
train.optimizer.learning_rate
train.optimizer.type
train.optimizer.warmdown_pct
train.optimizer.warmup_pct
train.save_every_n_steps
train.topk_style
train.upload_saves_to_wandb
wandb.entity
wandb.mode
wandb.project
train/aux_loss
train/epoch
train/fvu/model0/hookpointblocks.12.hook_resid_post
train/fvu/model0/hookpointblocks.13.hook_resid_post
train/fvu/model0/hookpointblocks.14.hook_resid_post
train/fvu/model0/hookpointblocks.15.hook_resid_post
train/fvu/model0/hookpointblocks.16.hook_resid_post
train/fvu/model0/hookpointblocks.17.hook_resid_post
Finished
tim_hua
2h 14m 17s
-
.checkpoints
.cache
0.01
80
16384
true
true
0
no_cache
8
float32
[{"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b","revision":null}]
100000
HuggingFaceFW/fineweb
256
real_wild2_hyperparameters_2025-04-13_20-11-53
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
10101
1024
500000
1
512
0.0625
10
30000
[0.9,0.999]
0.0001
adam
0.05
0.05
10000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
1470.68713
0
0.32291
0.32999
0.34541
-
-
-
Crashed
tim_hua
2h 45m 31s
-
.checkpoints
.cache
0.01
80
16384
true
true
0
no_cache
8
float32
[{"revision":null,"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b"}]
100000
HuggingFaceFW/fineweb
256
wild2_hyperparameters_2025-04-13_20-03-04
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
10101
1024
500000
1
512
0.0625
10
30000
[0.9,0.999]
0.0001
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
3174.51636
0
0.67766
0.75186
0.77748
-
-
-
Failed
tim_hua
1s
-
.checkpoints
.cache
0.01
128
16384
true
true
0
no_cache
8
float32
[{"revision":null,"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b"}]
100000
HuggingFaceFW/fineweb
256
wild2_hyperparameters_2025-04-13_20-00-15
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
10101
1024
500000
1
512
0.0625
10
30000
[0.9,0.999]
0.0001
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Finished
tim_hua
2h 24m 6s
-
.checkpoints
.cache
0.01
64
16384
true
true
0
no_cache
16
float32
[{"revision":null,"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b"}]
100000
HuggingFaceFW/fineweb
128
wild_hyperparameters_2025-04-13_12-41-55
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
1024
100000
4
512
0.0625
10
30000
[0.9,0.999]
0.0003
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
1480.60138
0
0.28873
0.31864
0.34475
-
-
-
Killed
tim_hua
8m 24s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
8
float32
[{"revision":null,"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b"}]
100000
HuggingFaceFW/fineweb
512
real2_added_activations_buffer_2025-04-13_12-31-09
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
256
10000
1
512
0.1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
1679.1167
0
0.4261
0.42255
0.43397
-
-
-
Failed
tim_hua
50s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
8
float32
[{"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b","revision":null}]
100000
HuggingFaceFW/fineweb
512
real2_added_activations_buffer_2025-04-13_12-22-41
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
256
10000
1
512
0.1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Failed
tim_hua
56s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
8
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
HuggingFaceFW/fineweb
512
real_added_activations_buffer_2025-04-13_12-13-06
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
256
10000
1
512
0.1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Crashed
tim_hua
21m
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
8
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
HuggingFaceFW/fineweb
512
debugrun_2025-04-13_11-44-35
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
256
10000
1
512
0.1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Killed
tim_hua
11m 56s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
8
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
HuggingFaceFW/fineweb
512
btopk_bos_no_grad_clip_2025-04-13_11-32-01
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
256
10000
1
512
0.1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
1703.95776
0
0.43855
0.44996
0.45727
-
-
-
Killed
tim_hua
24m 56s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
8
float32
[{"base_archicteture_name":null,"name":"google/gemma-2-2b","revision":null,"hf_model_name":null}]
100000
HuggingFaceFW/fineweb
512
btopk_right_nos_2025-04-13_11-06-41
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
256
10000
1
512
0.1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
1719.61218
0
0.43214
0.43585
0.4511
-
-
-
Crashed
tim_hua
3m 15s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
8
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
HuggingFaceFW/fineweb
512
topk_back2fw_2025-04-13_11-01-25
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
128
10000
1
-
0.5
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Crashed
tim_hua
15m
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
1
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
HuggingFaceFW/fineweb
512
topk_back2fw_2025-04-11_12-00-13
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
128
10000
1
-
0.5
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Crashed
tim_hua
32m
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
1
float32
[{"revision":null,"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b"}]
100000
HuggingFaceFW/fineweb
512
topk_back2fw_2025-04-11_11-24-10
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
128
10000
1
-
0.5
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Killed
tim_hua
5m 40s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
1
float32
[{"revision":null,"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b"}]
100000
HuggingFaceFW/fineweb
512
topk_back2fw_2025-04-11_11-15-38
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
128
10000
1
-
0.5
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
3825.68652
0
0.90065
0.9503
0.92771
-
-
-
Killed
tim_hua
22m 14s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
1
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
tiiuae/falcon-refinedweb
512
new_dataset2_2025-04-10_11-21-06
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
128
10000
1
-
0.5
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
1163.81982
0
0.41224
0.37585
0.40058
-
-
-
Failed
tim_hua
26s
-
.checkpoints
.cache
0.01
200
20000
true
true
0
no_cache
1
float32
[{"revision":null,"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b"}]
100000
tiiuae/falcon-refinedweb
512
new_dataset_2025-04-10_11-07-12
["blocks.12.hook_resid_post","blocks.13.hook_resid_post","blocks.14.hook_resid_post"]
42
128
10000
1
-
0.5
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
-
-
-
-
-
-
-
-
Killed
tim_hua
2m 25s
-
.checkpoints
.cache
0.01
125
20000
true
true
0
no_cache
1
float32
[{"base_archicteture_name":null,"name":"google/gemma-2-2b","revision":null,"hf_model_name":null}]
100000
HuggingFaceFW/fineweb
512
lower_lr_2025-04-10_09-49-48
["blocks.16.hook_resid_post","blocks.17.hook_resid_post","blocks.18.hook_resid_post"]
42
128
5000
1
-
0.5
10
30000
[0.9,0.999]
0.000005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
5657.80664
0
-
-
-
-
1.5032
1.63148
Killed
tim_hua
5m 30s
-
.checkpoints
.cache
0.01
125
20000
true
true
0
no_cache
1
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
HuggingFaceFW/fineweb
512
lower_lr_2025-04-10_09-40-54
["blocks.16.hook_resid_post","blocks.17.hook_resid_post","blocks.18.hook_resid_post"]
42
128
5000
1
-
0.5
10
30000
[0.9,0.999]
0.000005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
3201.66772
0
-
-
-
-
1.13924
1.12855
Finished
tim_hua
43m 27s
-
.checkpoints
.cache
0.01
125
20000
true
true
0
no_cache
1
float32
[{"name":"google/gemma-2-2b","revision":null,"hf_model_name":null,"base_archicteture_name":null}]
100000
HuggingFaceFW/fineweb
512
small_init_norm_big_lambda_2025-04-10_00-11-14
["blocks.16.hook_resid_post","blocks.17.hook_resid_post","blocks.18.hook_resid_post"]
42
128
10000
1
-
1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
3131.49219
0
-
-
-
-
0.66136
0.64201
Killed
tim_hua
3m 57s
-
.checkpoints
.cache
0.01
125
20000
true
true
0
no_cache
1
float32
[{"hf_model_name":null,"base_archicteture_name":null,"name":"google/gemma-2-2b","revision":null}]
100000
HuggingFaceFW/fineweb
512
small_init_norm_2025-04-09_23-14-44
["blocks.16.hook_resid_post","blocks.17.hook_resid_post","blocks.18.hook_resid_post"]
42
128
10000
1
-
0.1
10
30000
[0.9,0.999]
0.00005
adam
0.05
0.05
3000
batch_topk
false
jnainani-university-of-massachusetts-amherst
online
acausal_cc_tests
2932.63477
0
-
-
-
-
0.89273
0.89721
1-20
of 35