Banyan's workspace
Runs
61
Name
7 visualized
State
Notes
User
Created
Runtime
Sweep
TORCH_VERSION_AT_LEAST_2_5
always_save_checkpoint
batch_size
beta1
beta2
compile
decay_lr
device
dim
dropout
dtype
eval_interval
eval_iters
eval_only
grad_clip
gradient_accumulation_steps
init_from
learning_rate
local_rank
log_interval
max_iters
max_seq_len
multiple_of
n_heads
n_kv_heads
n_layers
out_dir
vocab_size
vocab_source
wandb_log
wandb_project
wandb_run_name
warmup_iters
weight_decay
iter
loss/train
loss/val
lr
mfu
tokens
Finished
-
banyan
7m 6s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_17_04_07_15
1000
0.1
0
10.57759
10.58124
0
-100
0
Finished
-
banyan
54s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_17_03_48_13
1000
0.1
0
10.57754
10.57729
0
-100
0
Crashed
-
banyan
2m 1s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
32
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_17_03_37_17
1000
0.1
-
-
-
-
-
-
Crashed
-
banyan
2m 46s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
32
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_17_03_32_49
1000
0.1
-
-
-
-
-
-
Failed
-
banyan
2m 27s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_17_03_29_36
1000
0.1
0
10.57754
10.57729
0
-100
0
Failed
-
banyan
4m 4s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_09_18_55_39
1000
0.1
0
10.57754
10.57729
0
-100
0
Crashed
-
banyan
1m
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
32
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_09_14_24_58
1000
0.1
-
-
-
-
-
-
Crashed
-
banyan
1m 1s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
32
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_09_14_23_25
1000
0.1
-
-
-
-
-
-
Crashed
-
banyan
1m 16s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_07_09_14_16_08
1000
0.1
-
-
-
-
-
-
Crashed
-
banyan
11h 27m 30s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_24_05_18_16
1000
0.1
10000
2.71542
2.66657
0.00099496
80.3064
5242880000
Crashed
-
banyan
17m 46s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_24_04_54_53
1000
0.1
-
-
-
-
-
-
Crashed
-
banyan
2m 16s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_24_04_52_13
1000
0.1
-
-
-
-
-
-
Finished
-
banyan
11m 49s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_21_44_24
1000
0.1
0
10.57431
10.57594
0
-100
0
Finished
-
banyan
7m 10s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_21_36_40
1000
0.1
0
10.57431
10.57594
0
-100
0
Crashed
-
banyan
2m
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_21_28_56
1000
0.1
-
-
-
-
-
-
Crashed
-
banyan
46s
-
true
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_21_20_25
1000
0.1
-
-
-
-
-
-
Finished
-
banyan
14s
-
-
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_20_30_00
1000
0.1
-
-
-
-
-
-
Finished
-
banyan
11m 22s
-
-
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_19_30_02
1000
0.1
0
10.57431
10.57594
0
-100
0
Crashed
-
banyan
1s
-
-
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
64
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_19_15_00
1000
0.1
-
-
-
-
-
-
Crashed
-
banyan
1s
-
-
true
8
0.9
0.95
true
true
cuda
1024
0.1
bfloat16
2000
100
false
1
16
scratch
0.001
-
100
200000
1024
32
32
32
32
out
32000
llama2
true
llamac
run2025_05_23_18_25_31
1000
0.1
-
-
-
-
-
-
1-20
of 61