Akhauriyash's workspace
Runs
26
Name
26 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
checkpoint.continue_training_from_init
checkpoint.dump.every
checkpoint.dump.keep
checkpoint.eval.every
checkpoint.eval.keep
checkpoint.path
data.add_bos
data.add_eos
data.batch_size
data.load_async
data.n_views
data.prefetch_size
data.root_dir
data.seed
data.seq_len
data.sources.dclm_baseline_1.0_shuffled
data.sources.fineweb_edu_10bt_shuffled
data.tokenizer.name
data.tokenizer.path
distributed.compile
distributed.compile_cache_size_limit
distributed.detect_anomaly
distributed.dp_replicate
distributed.dp_shard
distributed.float8_filter
distributed.fsdp_type
distributed.matmul_allow_tf32
distributed.model_dtype
distributed.selective_activation_checkpointing
distributed.spawn_method
distributed.tp_size
dump_dir
env.ENABLE_INTRA_NODE_COMM
env.MKL_NUM_THREADS
env.MKL_SERVICE_FORCE_INTEL
env.NCCL_DEBUG
env.NCCL_IB_TIMEOUT
env.OMP_NUM_THREADS
env.TORCH_NCCL_ASYNC_ERROR_HANDLING
env.TORCH_NCCL_AVOID_RECORD_STREAMS
eval.generator.dtype
eval.generator.max_tokens
eval.harness.tasks
gc_collect_freq
Finished
-
akhauriyash
3h 22s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4_L1/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4_L1
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
3h 14m 22s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4_L64/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4_L64
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
3h 4m 21s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4_L128/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4_L128
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 53m 11s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc64_L128/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc64_L128
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 56m 26s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L128/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L128
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 56m 56s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L64/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L64
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 55m 6s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L16/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L16
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
3h 6m 20s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L1/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc8_L1
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 51m 42s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc64_L16/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc64_L16
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 50m 23s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc128_L16/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc128_L16
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 53m 48s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc64_L1/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc64_L1
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
3h 1m 34s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc128_L1/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc128_L1
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
3h 11m 42s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc4
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 57m 49s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSM_Small_NoProjCyc/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSM_Small_NoProjCyc
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 37m 30s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCycPseudo128/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCycPseudo128
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 42m 6s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCycPseudo/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCycPseudo
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
5h 33m 51s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSM_Large_NoProjCyc/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSM_Large_NoProjCyc
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
2h 59m 6s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc128/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjCyc128
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
3h 42m 5s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjFAttn/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjFAttn
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
Finished
-
akhauriyash
3h 43m 47s
-
false
10000
3
5000
-1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjRand/checkpoints
true
true
16
true
2
512
/scratch/ya255/lingua/setup/data/
42
1024
100
-
sp
setup/l2tokenizer/tokenizer.model
true
8
false
1
1
layers\.[0-9]+\.
full_shard
false
bf16
false
forkserver
1
/scratch/ya255/lingua/reported_runs/AttentiveSSMNoProjRand
1
1
GNU
INFO
22
1
1
1
bf16
1024
["wikitext"]
1000
1-20
of 26