Amr-amr's workspace
Runs
70
State
Notes
User
Created
Runtime
Sweep
activation_checkpointing
auxiliary_loss_multiplier
canceled_check_interval
compile.backend
compile.fullgraph
compile.mode
console_log_interval
data.drop_last
data.generate_attention_mask
data.generate_doc_lengths
data.memmap_dtype
data.num_workers
data.pad_direction
data.paths
data.persistent_workers
data.pin_memory
data.prefetch_factor
data.timeout
device_eval_batch_size
device_train_batch_size
device_train_grad_accum
device_train_microbatch_size
distributed_strategy
dry_run
eval_interval
eval_on_load
eval_subset_num_batches
evaluators
extra_steps_after_cancel
force_save_unsharded
fsdp.precision
fsdp.sharding_strategy
fsdp.use_orig_params
fsdp.wrapping_strategy
fused_loss
gen1_gc_interval
global_train_batch_size
max_duration
max_grad_norm
model.activation_type
model.alibi
model.alibi_bias_max
model.attention_dropout
model.attention_layer_norm
model.attention_layer_norm_with_affine
Crashed
-
amr-amr
5mo 6d 17h 24m 33s
-
fine_grained
0.0001
50
inductor
false
default
1
true
false
false
uint16
0
right
["/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
16
0
16
32
1
32
fsdp
false
1000
false
-1
-
10
false
mixed
FULL_SHARD
true
by_block
false
1
512
262144
1
swiglu
false
8
0
false
false
Failed
-
amr-amr
4d 5h 11m 44s
-
fine_grained
0.0001
50
inductor
false
default
1
true
false
false
uint16
0
right
["/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
16
0
16
32
1
32
fsdp
false
1000
false
-1
-
10
false
mixed
FULL_SHARD
true
by_block
false
1
512
262144
1
swiglu
false
8
0
false
false
Failed
-
amr-amr
1d 20h 50m 29s
-
-
0.0001
50
inductor
false
default
1
true
false
false
uint16
0
right
["/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
16
0
16
32
1
32
fsdp
false
1000
false
-1
-
10
false
mixed
FULL_SHARD
true
by_block
-
1
512
262144
1
swiglu
false
8
0
false
false
Finished
-
amr-amr
23h 2m 26s
-
-
0.0001
50
inductor
false
default
1
true
false
false
uint16
0
right
["/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
16
0
16
128
1
128
fsdp
false
1000
false
-1
-
10
false
mixed
FULL_SHARD
true
-
-
1
512
262144
1
swiglu
false
8
0
false
false
Finished
-
amr-amr
22h 57m 53s
-
-
0.0001
50
inductor
false
default
1
true
false
false
uint16
0
right
["/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
16
0
16
64
1
64
fsdp
false
1000
false
-1
-
10
false
mixed
FULL_SHARD
true
-
-
1
512
262144
1
swiglu
false
8
0
false
false
Finished
-
amr-amr
1d 5h 49m 4s
-
-
0.0001
50
inductor
false
default
1
true
false
false
uint16
0
right
["/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
16
0
16
32
0.5
48
fsdp
false
1000
false
-1
-
10
false
mixed
FULL_SHARD
true
-
-
1
512
262144
1
swiglu
false
8
0
false
false
Finished
-
amr-amr
15h 13m 50s
-
-
0.0001
50
inductor
false
default
1
true
false
false
uint16
0
right
["/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-0-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-1-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/books/gpt-neox-olmo-dolma-v1_5/part-2-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-00-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-01-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-02-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-03-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-04-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-05-00000.npy","/network/datasets/olmo.var/olmo_7b0724/olmo-mix/v1_6-decontaminated/pes2o/gpt-neox-olmo-dolma-v1_5/part-06-00000.npy"]
true
true
16
0
16
64
1
64
fsdp
false
1000
false
-1
-
10
false
mixed
FULL_SHARD
true
-
-
1
512
262144
1
swiglu
false
8
0
false
false
1-7
of 7