Akshitab's group workspace
Group: OLMo-7B-Tulu
Name
1 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
activation_checkpointing
canceled_check_interval
compile.backend
compile.fullgraph
console_log_interval
data.drop_last
data.generate_attention_mask
data.num_workers
data.pad_direction
data.paths
data.persistent_workers
data.pin_memory
data.prefetch_factor
data.timeout
device_eval_batch_size
device_train_batch_size
device_train_grad_accum
device_train_microbatch_size
dry_run
epoch
eval_interval
eval_on_load
eval_subset_num_batches
evaluators
extra_steps_after_cancel
force_save_unsharded
fsdp.precision
fsdp.sharding_strategy
fsdp.use_orig_params
fsdp.wrapping_strategy
fused_loss
gen1_gc_interval
global_train_batch_size
load_path
max_duration
max_grad_norm
model.activation_type
model.alibi
model.alibi_bias_max
model.attention_dropout
model.attention_layer_norm
model.attention_layer_norm_with_affine
model.bias_for_layer_norm
model.block_group_size
Finished
-
epwalsh
8m 37s
-
-
50
inductor
false
1
true
-
16
right
["s3://ai2-llm/preprocessed/tulu-v2-sft-mixture/gpt-neox-20b-pii-special/data.npy","s3://ai2-llm/preprocessed/olmo-mix/v1_5-sample-9B/gpt-neox-20b-pii-special/data.npy"]
true
true
1
0
4
64
16
4
false
0
100
false
-1
[{"data":{"datasets":{"c4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_en/val.npy"],"gab-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/gab/val.npy"],"ice-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ice/val.npy"],"m2d2_wiki-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_wiki/val.npy"],"mc4_en-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/mc4_en/val.npy"],"ptb-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/ptb/val.npy"],"wikitext_103-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/wikitext_103/val.npy"],"4chan-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/4chan/val.npy"],"c4_100_domains-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/c4_100_domains/val.npy"],"m2d2_s2orc-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/m2d2_s2orc/val.npy"],"manosphere-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/manosphere/val.npy"],"pile-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/pile/val.npy"],"twitterAEE-validation":["s3://ai2-llm/eval-data/perplexity/v2_small_gptneox20b/twitterAEE/val.npy"]},"drop_last":true,"paths":null,"pin_memory":false,"prefetch_factor":null,"timeout":0,"num_workers":0,"pad_direction":"right","persistent_workers":false},"device_eval_batch_size":null,"label":"all-small-ppl-validation","subset_num_batches":null,"type":"lm"},{"device_eval_batch_size":null,"label":"piqa","subset_num_batches":null,"type":"downstream","data":{"persistent_workers":false,"pin_memory":false,"timeout":0,"drop_last":false,"num_workers":0,"prefetch_factor":null,"datasets":null,"pad_direction":"right","paths":null}},{"data":{"prefetch_factor":null,"datasets":null,"drop_last":false,"num_workers":0,"persistent_workers":false,"timeout":0,"pad_direction":"right","paths":null,"pin_memory":false},"device_eval_batch_size":null,"label":"hellaswag","subset_num_batches":null,"type":"downstream"},{"data":{"datasets":null,"drop_last":false,"num_workers":0,"paths":null,"persistent_workers":false,"pin_memory":false,"prefetch_factor":null,"pad_direction":"right","timeout":0},"device_eval_batch_size":null,"label":"winogrande","subset_num_batches":null,"type":"downstream"},{"data":{"datasets":null,"drop_last":false,"persistent_workers":false,"pin_memory":false,"prefetch_factor":null,"timeout":0,"num_workers":0,"pad_direction":"right","paths":null},"device_eval_batch_size":null,"label":"openbook_qa","subset_num_batches":null,"type":"downstream"},{"type":"downstream","data":{"timeout":0,"datasets":null,"num_workers":0,"pad_direction":"right","paths":null,"pin_memory":false,"prefetch_factor":null,"drop_last":false,"persistent_workers":false},"device_eval_batch_size":null,"label":"sciq","subset_num_batches":null},{"data":{"pad_direction":"right","pin_memory":false,"prefetch_factor":null,"timeout":0,"datasets":null,"drop_last":false,"num_workers":0,"paths":null,"persistent_workers":false},"device_eval_batch_size":null,"label":"arc_easy","subset_num_batches":null,"type":"downstream"},{"data":{"drop_last":false,"pad_direction":"right","paths":null,"persistent_workers":false,"num_workers":0,"pin_memory":false,"prefetch_factor":null,"timeout":0,"datasets":null},"device_eval_batch_size":null,"label":"copa","subset_num_batches":null,"type":"downstream"},{"data":{"paths":null,"prefetch_factor":null,"timeout":0,"persistent_workers":false,"pin_memory":false,"datasets":null,"drop_last":false,"num_workers":0,"pad_direction":"right"},"device_eval_batch_size":null,"label":"rte","subset_num_batches":null,"type":"downstream"},{"type":"downstream","data":{"paths":null,"timeout":0,"drop_last":false,"num_workers":0,"persistent_workers":false,"pin_memory":false,"prefetch_factor":null,"datasets":null,"pad_direction":"right"},"device_eval_batch_size":null,"label":"commitment_bank","subset_num_batches":null},{"data":{"num_workers":0,"paths":null,"pin_memory":false,"timeout":0,"pad_direction":"right","persistent_workers":false,"prefetch_factor":null,"datasets":null,"drop_last":false},"device_eval_batch_size":null,"label":"mrpc","subset_num_batches":null,"type":"downstream"},{"data":{"datasets":null,"drop_last":false,"num_workers":0,"pad_direction":"right","prefetch_factor":null,"timeout":0,"paths":null,"persistent_workers":false,"pin_memory":false},"device_eval_batch_size":null,"label":"sst2","subset_num_batches":null,"type":"downstream"}]
-
false
mixed
ShardingStrategy.FULL_SHARD
true
by_block
-
-
2048
/net/nfs.cirrascale/allennlp/petew/checkpoints/v1_5-mix-mitch-ish/step556000-unsharded
476837
1
swiglu
false
8
0
false
false
false
1
1-1
of 1