Akshitab's workspace
Runs
90
State
Notes
User
Tags
Created
Runtime
Sweep
activation_checkpointing
canceled_check_interval
compile.backend
compile.fullgraph
console_log_interval
data.drop_last
data.generate_attention_mask
data.num_workers
data.pad_direction
data.paths
data.persistent_workers
data.pin_memory
data.prefetch_factor
data.timeout
device_eval_batch_size
device_train_batch_size
device_train_grad_accum
device_train_microbatch_size
dry_run
epoch
eval_interval
eval_on_load
eval_subset_num_batches
evaluators
extra_steps_after_cancel
force_save_unsharded
fsdp.precision
fsdp.sharding_strategy
fsdp.use_orig_params
fsdp.wrapping_strategy
fused_loss
gen1_gc_interval
global_train_batch_size
load_path
max_duration
max_grad_norm
model.activation_type
model.alibi
model.alibi_bias_max
model.attention_dropout
model.attention_layer_norm
model.attention_layer_norm_with_affine
model.bias_for_layer_norm
model.block_group_size
Finished
-
epwalsh
18m 6s
-
fine_grained
50
-
-
1
true
false
32
right
["s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/wiki/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","s3://ai2-llm/preprocessed/redpajama_stackexchange_only/v1_decontaminated/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy"]
true
true
8
0
2
16
8
2
false
-
500
false
-1
[{"subset_num_batches":null,"type":"downstream","data":{"seed":null,"drop_last":false,"persistent_workers":false,"pin_memory":false,"timeout":0,"generate_attention_mask":false,"num_workers":0,"prefetch_factor":null,"pad_direction":"right","label_mask_paths":null,"paths":null,"datasets":null},"device_eval_batch_size":null,"label":"arc_challenge"},{"data":{"datasets":null,"pin_memory":false,"paths":null,"generate_attention_mask":false,"drop_last":false,"prefetch_factor":null,"timeout":0,"label_mask_paths":null,"num_workers":0,"persistent_workers":false,"pad_direction":"right","seed":null},"device_eval_batch_size":null,"label":"arc_easy","subset_num_batches":null,"type":"downstream"},{"subset_num_batches":null,"type":"downstream","data":{"drop_last":false,"num_workers":0,"pin_memory":false,"seed":null,"datasets":null,"generate_attention_mask":false,"timeout":0,"label_mask_paths":null,"paths":null,"persistent_workers":false,"prefetch_factor":null,"pad_direction":"right"},"device_eval_batch_size":null,"label":"boolq"},{"label":"hellaswag","subset_num_batches":null,"type":"downstream","data":{"label_mask_paths":null,"seed":null,"persistent_workers":false,"timeout":0,"generate_attention_mask":false,"num_workers":0,"paths":null,"pin_memory":false,"prefetch_factor":null,"datasets":null,"drop_last":false,"pad_direction":"right"},"device_eval_batch_size":null},{"data":{"drop_last":false,"generate_attention_mask":false,"pin_memory":false,"label_mask_paths":null,"prefetch_factor":null,"num_workers":0,"pad_direction":"right","paths":null,"persistent_workers":false,"datasets":null,"timeout":0,"seed":null},"device_eval_batch_size":null,"label":"mmlu_stem","subset_num_batches":null,"type":"downstream"},{"data":{"prefetch_factor":null,"timeout":0,"datasets":null,"persistent_workers":false,"generate_attention_mask":false,"pad_direction":"right","paths":null,"label_mask_paths":null,"seed":null,"drop_last":false,"num_workers":0,"pin_memory":false},"device_eval_batch_size":null,"label":"openbook_qa","subset_num_batches":null,"type":"downstream"},{"device_eval_batch_size":null,"label":"piqa","subset_num_batches":null,"type":"downstream","data":{"timeout":0,"paths":null,"num_workers":0,"drop_last":false,"label_mask_paths":null,"generate_attention_mask":false,"persistent_workers":false,"seed":null,"pin_memory":false,"pad_direction":"right","prefetch_factor":null,"datasets":null}},{"type":"downstream","data":{"paths":null,"pin_memory":false,"seed":null,"label_mask_paths":null,"pad_direction":"right","persistent_workers":false,"datasets":null,"drop_last":false,"generate_attention_mask":false,"prefetch_factor":null,"timeout":0,"num_workers":0},"device_eval_batch_size":null,"label":"sciq","subset_num_batches":null},{"data":{"datasets":null,"generate_attention_mask":false,"pin_memory":false,"num_workers":0,"paths":null,"persistent_workers":false,"label_mask_paths":null,"pad_direction":"right","prefetch_factor":null,"seed":null,"drop_last":false,"timeout":0},"device_eval_batch_size":null,"label":"winogrande","subset_num_batches":null,"type":"downstream"},{"data":{"datasets":{"c4_100_domains-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy"],"wikitext_103-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy"],"m2d2_wiki-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy"],"pile-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy"],"ptb-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy"],"gab-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy"],"ice-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy"],"m2d2_s2orc-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy"],"c4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy"],"manosphere-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy"],"mc4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy"],"twitterAEE-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy"],"4chan-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy"]},"persistent_workers":false,"prefetch_factor":null,"drop_last":true,"label_mask_paths":null,"num_workers":0,"paths":null,"seed":null,"generate_attention_mask":false,"timeout":0,"pad_direction":"right","pin_memory":false},"device_eval_batch_size":null,"label":"all-small-ppl-validation","subset_num_batches":null,"type":"lm"}]
10
false
mixed
ShardingStrategy.SHARD_GRAD_OP
true
by_block_and_size
true
8
1024
50e9T
1
swiglu
false
8
0
false
false
false
1
Finished
-
epwalsh
16h 15m 21s
-
fine_grained
50
inductor
false
1
true
false
20.48
right
["s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
5.12
0
2
17.04
8.52
2
false
-
1000
false
-1
[{"data":{"label_mask_paths":null,"datasets":null,"drop_last":false,"paths":null,"num_workers":0,"prefetch_factor":null,"persistent_workers":false,"timeout":0,"pad_direction":"right","pin_memory":false,"generate_attention_mask":false,"seed":null},"device_eval_batch_size":null,"label":"arc_challenge","subset_num_batches":null,"type":"downstream"},{"type":"downstream","data":{"label_mask_paths":null,"paths":null,"pin_memory":false,"datasets":null,"drop_last":false,"seed":null,"persistent_workers":false,"generate_attention_mask":false,"prefetch_factor":null,"timeout":0,"num_workers":0,"pad_direction":"right"},"device_eval_batch_size":null,"label":"arc_easy","subset_num_batches":null},{"type":"downstream","data":{"pad_direction":"right","paths":null,"timeout":0,"drop_last":false,"num_workers":0,"pin_memory":false,"prefetch_factor":null,"seed":null,"datasets":null,"generate_attention_mask":false,"label_mask_paths":null,"persistent_workers":false},"device_eval_batch_size":null,"label":"boolq","subset_num_batches":null},{"label":"copa","subset_num_batches":null,"type":"downstream","data":{"timeout":0,"label_mask_paths":null,"generate_attention_mask":false,"seed":null,"datasets":null,"drop_last":false,"persistent_workers":false,"num_workers":0,"pad_direction":"right","pin_memory":false,"paths":null,"prefetch_factor":null},"device_eval_batch_size":null},{"device_eval_batch_size":null,"label":"hellaswag","subset_num_batches":null,"type":"downstream","data":{"label_mask_paths":null,"timeout":0,"num_workers":0,"pad_direction":"right","persistent_workers":false,"datasets":null,"prefetch_factor":null,"seed":null,"paths":null,"pin_memory":false,"drop_last":false,"generate_attention_mask":false}},{"device_eval_batch_size":null,"label":"openbook_qa","subset_num_batches":null,"type":"downstream","data":{"pin_memory":false,"seed":null,"paths":null,"prefetch_factor":null,"generate_attention_mask":false,"timeout":0,"persistent_workers":false,"pad_direction":"right","drop_last":false,"num_workers":0,"datasets":null,"label_mask_paths":null}},{"data":{"drop_last":false,"num_workers":0,"pad_direction":"right","persistent_workers":false,"prefetch_factor":null,"generate_attention_mask":false,"pin_memory":false,"seed":null,"datasets":null,"label_mask_paths":null,"paths":null,"timeout":0},"device_eval_batch_size":null,"label":"piqa","subset_num_batches":null,"type":"downstream"},{"data":{"num_workers":0,"pad_direction":"right","seed":null,"prefetch_factor":null,"timeout":0,"persistent_workers":false,"label_mask_paths":null,"datasets":null,"drop_last":false,"generate_attention_mask":false,"paths":null,"pin_memory":false},"device_eval_batch_size":null,"label":"sciq","subset_num_batches":null,"type":"downstream"},{"device_eval_batch_size":null,"label":"winogrande","subset_num_batches":null,"type":"downstream","data":{"drop_last":false,"num_workers":0,"prefetch_factor":null,"label_mask_paths":null,"paths":null,"pin_memory":false,"persistent_workers":false,"generate_attention_mask":false,"pad_direction":"right","timeout":0,"datasets":null,"seed":null}},{"label":"all-small-ppl-validation","subset_num_batches":null,"type":"lm","data":{"datasets":{"dolma_books-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/dolma_books/val/part-0-00000.npy"],"c4_en-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/c4_en/val/part-0-00000.npy"],"ice-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/ice/val/part-0-00000.npy"],"m2d2_s2orc-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/m2d2_s2orc/val/part-0-00000.npy"],"pile-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/pile/val/part-0-00000.npy"],"dolma_reddit-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/dolma_reddit/val/part-0-00000.npy"],"dolma_common-crawl-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/dolma_common-crawl/val/part-0-00000.npy"],"dolma_stack-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/dolma_stack/val/part-0-00000.npy"],"dolma_pes2o-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/dolma_pes2o/val/part-0-00000.npy"],"dolma_wiki-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/dolma_wiki/val/part-0-00000.npy"],"wikitext_103-validation":["s3://ai2-llm/eval-data/perplexity/v3_small_gptneox20b/wikitext_103/val/part-0-00000.npy"]},"timeout":0,"generate_attention_mask":false,"persistent_workers":false,"drop_last":true,"label_mask_paths":null,"pad_direction":"right","num_workers":0,"paths":null,"pin_memory":false,"prefetch_factor":null,"seed":null},"device_eval_batch_size":null}]
10
false
mixed
ShardingStrategy.SHARD_GRAD_OP
true
by_block_and_size
true
6.71429
1024
["s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step212000","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step337000","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step408923","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step504500","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step505550","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step507000","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step508500","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step509200","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step511350","s3://ai2-llm/checkpoints/OLMo-medium/mitchish7/step521000"]
["2ep","3e12T"]
1
swiglu
false
8
0
false
false
false
1
Finished
-
epwalsh
8h 47m 54s
-
fine_grained
50
-
-
1
true
-
0
right
["/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00000.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00001.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00000.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00001.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00000.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00001.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00000.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00001.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00000.npy","/pfs/lustref1/flash/project_462000229/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00001.npy"]
true
true
16
0
2
2
1
2
false
0
1750
false
-1
[{"device_eval_batch_size":null,"label":"arc_easy","subset_num_batches":null,"type":"downstream","data":{"drop_last":false,"prefetch_factor":null,"datasets":null,"pad_direction":"right","persistent_workers":false,"timeout":0,"paths":null,"pin_memory":false,"num_workers":0}},{"subset_num_batches":null,"type":"downstream","data":{"prefetch_factor":null,"num_workers":0,"pad_direction":"right","datasets":null,"drop_last":false,"paths":null,"pin_memory":false,"timeout":0,"persistent_workers":false},"device_eval_batch_size":null,"label":"commitment_bank"},{"device_eval_batch_size":null,"label":"copa","subset_num_batches":null,"type":"downstream","data":{"pad_direction":"right","paths":null,"drop_last":false,"prefetch_factor":null,"num_workers":0,"timeout":0,"persistent_workers":false,"pin_memory":false,"datasets":null}},{"data":{"num_workers":0,"pad_direction":"right","pin_memory":false,"datasets":null,"persistent_workers":false,"paths":null,"prefetch_factor":null,"timeout":0,"drop_last":false},"device_eval_batch_size":null,"label":"hellaswag","subset_num_batches":null,"type":"downstream"},{"label":"openbook_qa","subset_num_batches":null,"type":"downstream","data":{"datasets":null,"num_workers":0,"persistent_workers":false,"prefetch_factor":null,"pad_direction":"right","pin_memory":false,"paths":null,"drop_last":false,"timeout":0},"device_eval_batch_size":null},{"type":"downstream","data":{"datasets":null,"persistent_workers":false,"pin_memory":false,"paths":null,"timeout":0,"drop_last":false,"num_workers":0,"prefetch_factor":null,"pad_direction":"right"},"device_eval_batch_size":null,"label":"piqa","subset_num_batches":null},{"type":"downstream","data":{"pin_memory":false,"timeout":0,"prefetch_factor":null,"drop_last":false,"datasets":null,"pad_direction":"right","num_workers":0,"paths":null,"persistent_workers":false},"device_eval_batch_size":null,"label":"rte","subset_num_batches":null},{"type":"downstream","data":{"timeout":0,"num_workers":0,"pad_direction":"right","paths":null,"datasets":null,"drop_last":false,"persistent_workers":false,"pin_memory":false,"prefetch_factor":null},"device_eval_batch_size":null,"label":"sciq","subset_num_batches":null},{"type":"downstream","data":{"num_workers":0,"persistent_workers":false,"pin_memory":false,"prefetch_factor":null,"datasets":null,"timeout":0,"pad_direction":"right","paths":null,"drop_last":false},"device_eval_batch_size":null,"label":"winogrande","subset_num_batches":null},{"subset_num_batches":null,"type":"lm","data":{"datasets":{"manosphere-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy"],"mc4_en-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy"],"pile-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy"],"c4_en-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy"],"ice-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy"],"c4_100_domains-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy"],"twitterAEE-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy"],"wikitext_103-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy"],"m2d2_wiki-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy"],"4chan-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy"],"gab-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy"],"ptb-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy"],"m2d2_s2orc-validation":["/scratch/project_462000229/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy"]},"persistent_workers":false,"timeout":0,"paths":null,"drop_last":true,"num_workers":0,"prefetch_factor":null,"pad_direction":"right","pin_memory":false},"device_eval_batch_size":null,"label":"all-small-ppl-validation"}]
-
false
mixed
ShardingStrategy.FULL_SHARD
true
one_in_four
-
-
2048
["/scratch/project_462000229/checkpoints/5043439/latest","/scratch/project_462000229/checkpoints/5054254/latest","/scratch/project_462000229/checkpoints/5070211/latest","/scratch/project_462000229/checkpoints/5074623/latest","/scratch/project_462000229/checkpoints/5079547/latest","/scratch/project_462000229/checkpoints/5122842/latest","/scratch/project_462000229/checkpoints/5139412/latest","/scratch/project_462000229/checkpoints/5211152/latest","/scratch/project_462000229/checkpoints/5235002/latest","/scratch/project_462000229/checkpoints/5307103/latest"]
[476837,"2e12T"]
1
swiglu
false
8
0
false
false
false
1
Finished
-
epwalsh
8m 37s
-
-
50
inductor
false
1
true
-
16
right
["s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample-9B/gpt-neox-20b-pii-special/data.npy","s3://ai2-llm/preprocessed/tulu-v2-sft-mixture/gpt-neox-20b-pii-special/data.npy"]
true
true
1
0
4
64
16
4
false
0
100
false
-1
[{"device_eval_batch_size":null,"label":"arc_easy","subset_num_batches":null,"type":"downstream","data":{"drop_last":false,"num_workers":0,"paths":null,"prefetch_factor":null,"persistent_workers":false,"pin_memory":false,"datasets":null,"pad_direction":"right","timeout":0}},{"type":"downstream","data":{"timeout":0,"pin_memory":false,"drop_last":false,"num_workers":0,"pad_direction":"right","datasets":null,"prefetch_factor":null,"paths":null,"persistent_workers":false},"device_eval_batch_size":null,"label":"commitment_bank","subset_num_batches":null},{"device_eval_batch_size":null,"label":"copa","subset_num_batches":null,"type":"downstream","data":{"pin_memory":false,"persistent_workers":false,"prefetch_factor":null,"drop_last":false,"num_workers":0,"datasets":null,"pad_direction":"right","paths":null,"timeout":0}},{"subset_num_batches":null,"type":"downstream","data":{"timeout":0,"drop_last":false,"pad_direction":"right","num_workers":0,"paths":null,"persistent_workers":false,"prefetch_factor":null,"pin_memory":false,"datasets":null},"device_eval_batch_size":null,"label":"hellaswag"},{"subset_num_batches":null,"type":"downstream","data":{"paths":null,"num_workers":0,"persistent_workers":false,"timeout":0,"pin_memory":false,"pad_direction":"right","datasets":null,"prefetch_factor":null,"drop_last":false},"device_eval_batch_size":null,"label":"openbook_qa"},{"data":{"paths":null,"persistent_workers":false,"prefetch_factor":null,"drop_last":false,"num_workers":0,"timeout":0,"pin_memory":false,"datasets":null,"pad_direction":"right"},"device_eval_batch_size":null,"label":"piqa","subset_num_batches":null,"type":"downstream"},{"label":"rte","subset_num_batches":null,"type":"downstream","data":{"drop_last":false,"num_workers":0,"pin_memory":false,"timeout":0,"paths":null,"persistent_workers":false,"datasets":null,"prefetch_factor":null,"pad_direction":"right"},"device_eval_batch_size":null},{"data":{"pad_direction":"right","num_workers":0,"prefetch_factor":null,"datasets":null,"persistent_workers":false,"drop_last":false,"paths":null,"pin_memory":false,"timeout":0},"device_eval_batch_size":null,"label":"sciq","subset_num_batches":null,"type":"downstream"},{"type":"downstream","data":{"pad_direction":"right","paths":null,"persistent_workers":false,"drop_last":false,"timeout":0,"datasets":null,"num_workers":0,"pin_memory":false,"prefetch_factor":null},"device_eval_batch_size":null,"label":"winogrande","subset_num_batches":null},{"data":{"prefetch_factor":null,"datasets":{"4chan-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy"],"wikitext_103-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy"],"m2d2_s2orc-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy"],"c4_100_domains-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy"],"c4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy"],"pile-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy"],"ptb-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy"],"twitterAEE-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy"],"gab-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy"],"ice-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy"],"manosphere-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy"],"mc4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy"],"m2d2_wiki-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy"]},"num_workers":0,"pad_direction":"right","paths":null,"persistent_workers":false,"pin_memory":false,"timeout":0,"drop_last":true},"device_eval_batch_size":null,"label":"all-small-ppl-validation","subset_num_batches":null,"type":"lm"}]
-
false
mixed
ShardingStrategy.FULL_SHARD
true
by_block
-
-
2048
/net/nfs.cirrascale/allennlp/petew/checkpoints/v1_5-mix-mitch-ish/step556000-unsharded
476837
1
swiglu
false
8
0
false
false
false
1
Finished
-
epwalsh
12h 15m 39s
-
false
50
inductor
false
1
true
-
16
right
["s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-000-00001.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-001-00001.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-002-00001.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-003-00001.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00000.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample/gpt-neox-20b-pii-special/part-004-00001.npy"]
true
true
1
0
2
10
5
2
false
1
1000
false
-1
[{"subset_num_batches":null,"type":"downstream","data":{"persistent_workers":false,"timeout":0,"datasets":null,"paths":null,"num_workers":0,"pin_memory":false,"pad_direction":"right","drop_last":false,"prefetch_factor":null},"device_eval_batch_size":null,"label":"arc_easy"},{"label":"commitment_bank","subset_num_batches":null,"type":"downstream","data":{"datasets":null,"drop_last":false,"num_workers":0,"timeout":0,"pin_memory":false,"prefetch_factor":null,"pad_direction":"right","paths":null,"persistent_workers":false},"device_eval_batch_size":null},{"device_eval_batch_size":null,"label":"copa","subset_num_batches":null,"type":"downstream","data":{"prefetch_factor":null,"timeout":0,"paths":null,"pin_memory":false,"drop_last":false,"num_workers":0,"persistent_workers":false,"datasets":null,"pad_direction":"right"}},{"type":"downstream","data":{"prefetch_factor":null,"datasets":null,"pad_direction":"right","pin_memory":false,"timeout":0,"num_workers":0,"paths":null,"persistent_workers":false,"drop_last":false},"device_eval_batch_size":null,"label":"hellaswag","subset_num_batches":null},{"label":"openbook_qa","subset_num_batches":null,"type":"downstream","data":{"pin_memory":false,"drop_last":false,"persistent_workers":false,"paths":null,"timeout":0,"datasets":null,"num_workers":0,"pad_direction":"right","prefetch_factor":null},"device_eval_batch_size":null},{"subset_num_batches":null,"type":"downstream","data":{"timeout":0,"datasets":null,"pad_direction":"right","num_workers":0,"paths":null,"drop_last":false,"pin_memory":false,"prefetch_factor":null,"persistent_workers":false},"device_eval_batch_size":null,"label":"piqa"},{"data":{"num_workers":0,"persistent_workers":false,"datasets":null,"drop_last":false,"paths":null,"timeout":0,"pin_memory":false,"prefetch_factor":null,"pad_direction":"right"},"device_eval_batch_size":null,"label":"rte","subset_num_batches":null,"type":"downstream"},{"type":"downstream","data":{"pad_direction":"right","datasets":null,"pin_memory":false,"prefetch_factor":null,"num_workers":0,"paths":null,"timeout":0,"persistent_workers":false,"drop_last":false},"device_eval_batch_size":null,"label":"sciq","subset_num_batches":null},{"data":{"pin_memory":false,"datasets":null,"drop_last":false,"num_workers":0,"prefetch_factor":null,"pad_direction":"right","persistent_workers":false,"timeout":0,"paths":null},"device_eval_batch_size":null,"label":"winogrande","subset_num_batches":null,"type":"downstream"},{"subset_num_batches":null,"type":"lm","data":{"datasets":{"c4_100_domains-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy"],"pile-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy"],"4chan-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy"],"twitterAEE-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy"],"c4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy"],"gab-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy"],"manosphere-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy"],"mc4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy"],"wikitext_103-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy"],"ice-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy"],"m2d2_s2orc-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy"],"m2d2_wiki-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy"],"ptb-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy"]},"drop_last":true,"paths":null,"num_workers":0,"pin_memory":false,"prefetch_factor":null,"pad_direction":"right","persistent_workers":false,"timeout":0},"device_eval_batch_size":null,"label":"all-small-ppl-validation"}]
-
false
mixed
ShardingStrategy.FULL_SHARD
true
by_block
-
-
2160
["s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step452000","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step471000","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step472000","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step491800","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step499000","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step501150","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step512000","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step531000","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step550000","s3://ai2-llm/checkpoints/7b/v1_5-mix-mitch-ish/step556000"]
476837
1
swiglu
false
8
0
false
false
false
1
1-5
of 5