Levmckinney's workspace
Runs
46
Name
5 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
constant
data
data.max_length
data.name
data.split
data.text_column
dist
dist.cpu_offload
dist.fsdp
loss
model
model.name
model.revision
model.slow_tokenizer
num_steps
opt
opt.lr_scale
opt.momentum
opt.optimizer
opt.weight_decay
opt.zero
output
per_gpu_batch_size
pre_ln
seed
separate_unembeddings
tokens_per_step
wandb
wandb_upload_checkpoints
bias_only
checkpoint_dir
checkpoint_freq
data.dataset_shuffle
data.dataset_shuffle_seed
data.shuffle_seed
dist.dataloader_shuffle
dist.nccl_timeout
dist.per_gpu_batch_size
model.precision
opt.warmup_steps
w_ce
w_kl
data.max_seq_len
bias_norm/0.ffn
Finished
-
levmckinney
1d 1h 48m 55s
-
-
-
-
togethercomputer/RedPajama-Data-1T-Sample
train
text
-
false
false
KL
-
meta-llama/Meta-Llama-3-8B
main
false
250
-
1
0.9
SGD
0.001
false
/home/lev/tuned_lenses/meta-llama/Meta-Llama-3-8B
-
-
42
-
262144
tuned-lens-llama3-8b-redpj
-
false
/home/lev/tuned_lenses/meta-llama/Meta-Llama-3-8B/checkpoints
30
false
42
-
true
7200
1
float32
0
-
-
2048
1.25328
Finished
-
levmckinney
1d 1h 55m 7s
-
-
-
-
togethercomputer/RedPajama-Data-1T-Sample
train
text
-
false
false
KL
-
meta-llama/Meta-Llama-3-8B-Instruct
main
false
250
-
1
0.9
SGD
0.001
false
/home/lev/tuned_lenses/meta-llama/Meta-Llama-3-8B-Instruct
-
-
42
-
262144
tuned-lens-llama3-8b-redpj
-
false
/home/lev/tuned_lenses/meta-llama/Meta-Llama-3-8B-Instruct/checkpoints
30
false
42
-
true
7200
1
float32
0
-
-
2048
1.21811
Crashed
-
levmckinney
6d 10h 54m 24s
-
-
-
2048
togethercomputer/RedPajama-Data-1T-Sample
train
text
-
false
false
LossChoice.CONTRASTIVE
-
huggyllama/llama-7b
main
true
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-ce-0.5-72vbr-btw7d","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-ce-0.8-tzq5l-rhjx4","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-ce-0.95-blcxk-f6qmk","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-kl-0.5-g8wk2-rj7b4","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-kl-0.8-9c7hl-vq22q","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-kl-0.95-r8rgp-p6jlg","/root/checkpoints/tuned-lens-llama-7b-redpj-wm5g5-kw5gj"]
-
-
42
-
262144
["tuned-lens-llama-7b-redpj-contrastive-ce-0.5-72vbr-btw7d","tuned-lens-llama-7b-redpj-contrastive-ce-0.8-tzq5l-rhjx4","tuned-lens-llama-7b-redpj-contrastive-ce-0.95-blcxk-f6qmk","tuned-lens-llama-7b-redpj-contrastive-kl-0.5-g8wk2-rj7b4","tuned-lens-llama-7b-redpj-contrastive-kl-0.8-9c7hl-vq22q","tuned-lens-llama-7b-redpj-contrastive-kl-0.95-r8rgp-p6jlg","tuned-lens-llama-7b-redpj-wm5g5-kw5gj"]
-
false
["/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-ce-0.5-72vbr-btw7d/checkpoints","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-ce-0.8-tzq5l-rhjx4/checkpoints","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-ce-0.95-blcxk-f6qmk/checkpoints","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-kl-0.5-g8wk2-rj7b4/checkpoints","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-kl-0.8-9c7hl-vq22q/checkpoints","/root/checkpoints/tuned-lens-llama-7b-redpj-contrastive-kl-0.95-r8rgp-p6jlg/checkpoints","/root/checkpoints/tuned-lens-llama-7b-redpj-wm5g5-kw5gj/checkpoints"]
30
false
42
-
true
7200
1
auto
0
0.25
-0.035714
-
0.69116
Finished
-
levmckinney
1d 6h 45m 27s
-
-
-
2048
/root/datasets/pile/val.jsonl
train
text
-
false
false
LossChoice.CONTRASTIVE
-
EleutherAI/pythia-410m-deduped
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-0.01-ww76d-dmb64","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-0.1-j728w-gdhjj","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-0.5-jp9sx-8dft6","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-0.1-6ndq8-fw6vk","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-0.5-67nzv-7z9b4","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-0.8-gqnrh-rrkwj","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-1-7hc7m-mcb6h","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-kl-0.8-8ffl6-2pknm","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-kl-1-fdlmz-9c9cz"]
-
-
42
-
262144
["tuned-lens-pythia-410m-pile-contrastive-0.01-ww76d-dmb64","tuned-lens-pythia-410m-pile-contrastive-0.1-j728w-gdhjj","tuned-lens-pythia-410m-pile-contrastive-0.5-jp9sx-8dft6","tuned-lens-pythia-410m-pile-contrastive-ce-0.1-6ndq8-fw6vk","tuned-lens-pythia-410m-pile-contrastive-ce-0.5-67nzv-7z9b4","tuned-lens-pythia-410m-pile-contrastive-ce-0.8-gqnrh-rrkwj","tuned-lens-pythia-410m-pile-contrastive-ce-1-7hc7m-mcb6h","tuned-lens-pythia-410m-pile-contrastive-kl-0.8-8ffl6-2pknm","tuned-lens-pythia-410m-pile-contrastive-kl-1-fdlmz-9c9cz"]
-
false
["/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-0.01-ww76d-dmb64/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-0.1-j728w-gdhjj/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-0.5-jp9sx-8dft6/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-0.1-6ndq8-fw6vk/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-0.5-67nzv-7z9b4/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-0.8-gqnrh-rrkwj/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-ce-1-7hc7m-mcb6h/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-kl-0.8-8ffl6-2pknm/checkpoints","/root/checkpoints/tuned-lens-pythia-410m-pile-contrastive-kl-1-fdlmz-9c9cz/checkpoints"]
30
false
42
-
true
7200
4
auto
0
0.28889
0.17667
-
0.39553
Finished
-
levmckinney
1d 42m 8s
-
-
-
2048
["Anthropic/hh-rlhf","togethercomputer/RedPajama-Data-1T-Sample"]
train
["chosen","text"]
-
false
false
LossChoice.KL
-
meta-llama/Llama-2-7b-chat-hf
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/root/checkpoints/tuned-lens-llama2-7b-chat-hh-rlhf-nwfg2-l7r6j","/root/checkpoints/tuned-lens-llama2-7b-chat-redpj-jkhtr-d75vm"]
-
-
42
-
262144
["tuned-lens-llama2-7b-chat-hh-rlhf-nwfg2-l7r6j","tuned-lens-llama2-7b-chat-redpj-jkhtr-d75vm"]
-
false
["/root/checkpoints/tuned-lens-llama2-7b-chat-hh-rlhf-nwfg2-l7r6j/checkpoints","/root/checkpoints/tuned-lens-llama2-7b-chat-redpj-jkhtr-d75vm/checkpoints"]
30
-
-
42
-
7200
1
float32
0
-
-
-
0.58243
Crashed
-
levmckinney
10d 22h 34m 59s
-
-
-
2048
togethercomputer/RedPajama-Data-1T-Sample
train
text
-
false
false
LossChoice.KL
-
meta-llama/Llama-2-7b-hf
main
false
250
-
0.83333
0.9
OptimizerOption.SGD
0.001
false
["/root/checkpoints/tuned-lens-llama-7b-7x22f-8cjqx","/root/checkpoints/tuned-lens-llama2-7b-lr-05-qwhft-bzjmw","/root/checkpoints/tuned-lens-llama2-7b-redpj-slgbl-vldl5"]
-
-
42
-
262144
["tuned-lens-llama-7b-7x22f-8cjqx","tuned-lens-llama2-7b-lr-05-qwhft-bzjmw","tuned-lens-llama2-7b-redpj-slgbl-vldl5"]
-
false
["/root/checkpoints/tuned-lens-llama-7b-7x22f-8cjqx/checkpoints","/root/checkpoints/tuned-lens-llama2-7b-lr-05-qwhft-bzjmw/checkpoints","/root/checkpoints/tuned-lens-llama2-7b-redpj-slgbl-vldl5/checkpoints"]
30
-
-
42
-
3200
1
["auto","float32"]
0
-
-
-
[null,0.6025515794754028]
Finished
-
levmckinney
17h 49m 49s
-
-
-
2048
["Anthropic/hh-rlhf","togethercomputer/RedPajama-Data-1T-Sample"]
train
["chosen","text"]
-
false
false
LossChoice.KL
-
meta-llama/Llama-2-13b-chat-hf
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/root/checkpoints/tuned-lens-llama2-13b-chat-hh-rlhf-5wqx2-529hz","/root/checkpoints/tuned-lens-llama2-13b-chat-redpj-vrr4f-lrww6"]
-
-
42
-
262144
["tuned-lens-llama2-13b-chat-hh-rlhf-5wqx2-529hz","tuned-lens-llama2-13b-chat-redpj-vrr4f-lrww6"]
-
false
["/root/checkpoints/tuned-lens-llama2-13b-chat-hh-rlhf-5wqx2-529hz/checkpoints","/root/checkpoints/tuned-lens-llama2-13b-chat-redpj-vrr4f-lrww6/checkpoints"]
30
-
-
42
-
7200
1
auto
0
-
-
-
0.42871
Crashed
-
levmckinney
1d 11h 17m 46s
-
-
-
2048
togethercomputer/RedPajama-Data-1T-Sample
train
text
-
false
false
LossChoice.KL
-
meta-llama/Llama-2-13b-hf
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/root/checkpoints/tuned-lens-llama2-13b-redpj-debug-lkg27-dmffv","/root/checkpoints/tuned-lens-llama2-13b-redpj-zzfkb-5wwkk"]
-
-
42
-
262144
["tuned-lens-llama2-13b-redpj-debug-lkg27-dmffv","tuned-lens-llama2-13b-redpj-zzfkb-5wwkk"]
-
false
["/root/checkpoints/tuned-lens-llama2-13b-redpj-debug-lkg27-dmffv/checkpoints","/root/checkpoints/tuned-lens-llama2-13b-redpj-zzfkb-5wwkk/checkpoints"]
30
-
-
42
-
7200
1
auto
0
-
-
-
0.38098
Crashed
-
levmckinney
11m 30s
-
-
-
2048
togethercomputer/RedPajama-Data-1T-Sample
train
text
-
false
false
LossChoice.KL
-
EleutherAI/pythia-70m
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
/root/checkpoints/tuned-lens-trainer-wk2g5-cfppp
-
-
42
-
262144
tuned-lens-trainer-wk2g5-cfppp
-
false
/root/checkpoints/tuned-lens-trainer-wk2g5-cfppp/checkpoints
-
-
-
-
-
1200
1
auto
0
-
-
-
0.93359
Crashed
-
levmckinney
3h 59m 25s
-
false
-
2048
/datasets/val.jsonl
validation
text
-
false
true
LossChoice.KL
-
huggyllama/llama-7b
main
true
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/output/huggyllama/llama-7b-1683479480","/output/huggyllama/llama-7b-1683483775"]
1
false
42
false
262144
["huggyllama/llama-7b-1683479480","huggyllama/llama-7b-1683483775"]
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.54941
Crashed
-
levmckinney
4h 12m 21s
-
false
-
2048
/datasets/val.jsonl
validation
text
-
false
true
LossChoice.KL
-
EleutherAI/pythia-2.8b-deduped-v0
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/output/EleutherAI/pythia-2.8b-deduped-v0-1683315633","/output/EleutherAI/pythia-2.8b-deduped-v0-1683316697"]
1
false
42
false
262144
["EleutherAI/pythia-2.8b-deduped-v0-1683315633","EleutherAI/pythia-2.8b-deduped-v0-1683316697"]
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.025345
Finished
-
levmckinney
5h 14m 37s
-
false
-
2048
/datasets/val.jsonl
validation
text
-
false
true
LossChoice.KL
-
EleutherAI/pythia-12b-deduped
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
/output/EleutherAI/pythia-12b-deduped-1683299166
1
false
42
false
262144
EleutherAI/pythia-12b-deduped-1683299166
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.015914
Finished
-
levmckinney
6h 1m 32s
-
false
-
2048
/datasets/val.jsonl
validation
text
-
false
true
LossChoice.KL
-
EleutherAI/pythia-6.9b-deduped
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
/output/EleutherAI/pythia-6.9b-deduped-1683261951
1
false
42
false
262144
EleutherAI/pythia-6.9b-deduped-1683261951
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.022592
Crashed
-
levmckinney
5h 1m 46s
-
false
-
2048
/datasets/val.jsonl
validation
text
-
false
true
LossChoice.KL
-
EleutherAI/pythia-2.8b-deduped
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
["/output/EleutherAI/pythia-2.8b-deduped-1683243602","/output/EleutherAI/pythia-2.8b-deduped-1683247839"]
1
false
42
false
262144
["EleutherAI/pythia-2.8b-deduped-1683243602","EleutherAI/pythia-2.8b-deduped-1683247839"]
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.037554
Finished
-
levmckinney
13h 40m 20s
-
false
-
2048
val.jsonl
validation
text
-
false
false
LossChoice.KL
-
EleutherAI/pythia-1.4b-deduped
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
-
1
false
42
false
262144
pythia-1.4-deduped-single-gpu
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.083723
Finished
-
levmckinney
8h 23m 35s
-
false
-
2048
val.jsonl
validation
text
-
false
false
LossChoice.KL
-
EleutherAI/pythia-410m-deduped
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
-
2
false
42
false
262144
pythia-160m-deduped-single-gpu
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.36663
Failed
-
levmckinney
11d 20h 41m 57s
-
false
Data(name=['val.jsonl'], split='validation', text_column='text', revision=None)
1280
["tests/test_data/pile_text.jsonl","val.jsonl"]
validation
text
Distributed(fsdp=False, cpu_offload=False)
false
false
["LossChoice.CE","LossChoice.KL"]
Model(name='EleutherAI/pythia-70m-deduped', revision='main', slow_tokenizer=False, tokenizer=None, tokenizer_type=None)
EleutherAI/pythia-70m-deduped
main
false
179.14286
Optimizer(weight_decay=0.001, lr_scale=1.0, momentum=0.9, zero=False, optimizer=<OptimizerOption.SGD: 'sgd'>, warmup_steps=None)
1
0.9
OptimizerOption.SGD
0.001
false
-
2.28571
false
42
false
262144
["pythia-160m-deduped-single-gpu","pythia-70m-deduped-single-gpu-no-renormalize","smoke-test-1"]
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
0.93926
Finished
-
levmckinney
3h 38m 52s
-
false
-
2048
val.jsonl
validation
text
-
false
false
LossChoice.KL
-
EleutherAI/pythia-160m-deduped
main
false
250
-
1
0.9
OptimizerOption.SGD
0.001
false
-
2
false
42
false
262144
pythia-160m-deduped-single-gpu
false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
2.06097
1-18
of 18