Zaydzuhri's workspace
Runs
123
Name
2 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
iter
lr
train/loss
val/loss
always_save
batch_size
beta1
beta2
bias
ctx_size
dataset
decay_lr
dropout
eval_interval
eval_iters
eval_only
grad_clip
gradient_accumulation_steps
init_from
log_interval
lr
lr_decay_iters
max_iters
min_lr
model_name
model_type
n_embd
n_head
n_layer
out_dir
wandb_log
wandb_project
wandb_run_name
warmup_iters
weight_decay
profile
n_params
cache_batch_size
config_type
ffn_dim_multiplier
multiple_of
n_kv_head
n_kv_layers
loss
Killed
-
zaydzuhri
15m 19s
-
600
0.00015
5.30086
5.57486
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-poetry
llama
512
8
8
out
true
fadeformer
small-llama-poetry
800
0.1
-
85074432
64
llama
1.5
512
8
-
-
Finished
-
zaydzuhri
1h 27m 6s
-
3800
0.00002
4.08465
4.39162
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-mlkv-16-poetry
llama-mlkv
512
8
8
out
true
fadeformer
small-llama-mlkv-16-poetry
800
0.1
-
85099008
64
llama
1.69
1
4
4
-
Finished
-
zaydzuhri
1h 27m 33s
-
3800
0.00002
4.04589
4.35214
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-gqa-16-poetry
llama-mlkv
512
8
8
out
true
fadeformer
small-llama-gqa-16-poetry
800
0.1
-
85099008
64
llama
1.69
1
2
8
-
Finished
-
zaydzuhri
2h 10m 28s
-
3800
0.00001
4.27565
4.57473
true
1
0.9
0.95
-
1024
poetry
true
-
200
10
false
1
24
scratch
1
0.0001
3000
4000
0.00001
little-llama-poetry
llama
640
10
10
out
true
fadeformer
little-llama-poetry
800
0.1
-
113964160
128
llama
1
64
10
-
-
Finished
-
zaydzuhri
1h 29m 43s
-
3800
0.00002
4.18533
4.53366
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-mlkv-lin-poetry
llama-mlkv
512
8
8
out
true
fadeformer
small-llama-mlkv-lin-poetry
800
0.1
-
85078528
64
llama
1.743
1
1
2
-
Finished
-
zaydzuhri
1h 39m 20s
-
3800
0.00002
4.11937
4.46512
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-mqa-poetry
llama
512
8
8
out
true
fadeformer
small-llama-mqa-poetry
800
0.1
-
85078528
64
llama
1.7195
1
1
-
-
Finished
-
zaydzuhri
1h 29m 9s
-
3800
0.00002
4.19581
4.47014
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-mlkv-poetry
llama-mlkv
512
8
8
out
true
fadeformer
small-llama-mlkv-poetry
800
0.1
-
85074432
64
llama
1.215
8
1
2
-
Finished
-
zaydzuhri
1h 25m 18s
-
3800
0.00002
4.17765
4.49129
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-mlkv-poetry
llama-mlkv
512
8
8
out
true
fadeformer
small-llama-mlkv-poetry
800
0.1
-
85041664
64
llama
1.74
8
1
2
-
Finished
-
zaydzuhri
4h 10m 45s
-
3800
0.00002
4.11539
4.4303
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-mqa-poetry
llama
512
8
8
out
true
fadeformer
small-llama-mqa-poetry
800
0.1
-
85041664
-
llama
1.715
8
1
-
-
Finished
-
zaydzuhri
1h 26m 45s
-
3800
0.00002
4.21076
4.60301
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-gkv-poetry
llama-gkv
512
8
8
out
true
fadeformer
small-llama-gkv-poetry
800
0.1
-
85091328
-
llama
1.484
4
1
-
-
Finished
-
zaydzuhri
1h 25m 47s
-
3800
0.00002
4.38479
4.69084
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-gkv-poetry
llama-gkv
512
8
8
out
true
fadeformer
small-llama-gkv-poetry
800
0.1
-
85107200
-
llama
1.235
8
1
-
-
Finished
-
zaydzuhri
1h 24m 31s
-
3800
0.00002
4.29785
4.62777
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-gkv-poetry
llama-gkv
512
8
8
out
true
fadeformer
small-llama-gkv-poetry
800
0.1
-
85107200
-
llama
1.61
8
1
-
-
Finished
-
zaydzuhri
1h 24m 21s
-
3800
0.00002
4.41628
4.71882
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-gkv-poetry
llama-gkv
512
8
8
out
true
fadeformer
small-llama-gkv-poetry
800
0.1
-
85041664
-
llama
1.62
8
8
-
-
Finished
-
zaydzuhri
1h 24m 16s
-
3800
0.00002
4.36767
4.67019
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-gkv-poetry
llama-gkv
512
8
8
out
true
fadeformer
small-llama-gkv-poetry
800
0.1
-
85008896
-
llama
1.675
8
1
-
-
Finished
-
zaydzuhri
1h 25m 58s
-
3800
0.00002
4.11539
4.4303
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-mqa-poetry
llama
512
8
8
out
true
fadeformer
small-llama-mqa-poetry
800
0.1
-
85041664
-
llama
1.715
8
1
-
-
Finished
-
zaydzuhri
1h 26m
-
3800
0.00002
4.10199
4.38269
true
4
0.9
0.95
-
1024
poetry
true
-
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-poetry
llama
512
8
8
out
true
fadeformer
small-llama-poetry
800
0.1
-
85074432
-
llama
1.5
512
8
-
-
Finished
-
zaydzuhri
56m 19s
-
3800
0.00002
4.57125
4.81766
true
4
0.9
0.95
false
1024
poetry
true
0.2
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-fadellama-attff-poetry
fadellama-attff
512
8
8
out
true
fadeformer
small-fadellama-attff-poetry
800
0.1
-
85074432
-
-
-
-
-
-
-
Finished
-
zaydzuhri
1h 18m 37s
-
3800
0.00002
4.05578
4.40016
true
8
0.9
0.95
false
512
poetry
true
0.2
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-llama-512-poetry
llama
512
8
8
out
true
fadeformer
small-llama-512-poetry
800
0.1
-
85074432
-
-
-
-
-
-
-
Crashed
-
zaydzuhri
7m 51s
-
400
0.0001
6.10602
6.1927
true
4
0.9
0.95
false
1024
poetry
true
0.2
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-fadellama-attff-lin-poetry
fadellama-attff
512
8
8
out
true
fadeformer
small-fadellama-attff-lin-poetry
800
0.1
-
85074432
-
-
-
-
-
-
-
Finished
-
zaydzuhri
55m 35s
-
3800
0.00002
4.81116
5.03176
true
4
0.9
0.95
false
1024
poetry
true
0.2
200
100
false
1
6
scratch
1
0.0002
3000
4000
0.00002
small-fadellama-attff-poetry
fadellama-attff
512
8
8
out
true
fadeformer
small-fadellama-attff-poetry
800
0.1
-
85074432
-
-
-
-
-
-
-
1-20
of 123