Pszemraj's workspace
Runs 
49
Name
18 visualized
train/global_step
train/loss
train/epoch
345
1.0578
2.99756
345
0.8844
2.99756
115
2.7137
0.99919
-
-
-
345
1.1307
2.99674
345
0.861
2.99674
345
1.0167
2.99674
460
2.4757
3.99566
115
4.8971
0.99891
460
1.0608
3.99566
460
0.9359
3.99566
115
1.9528
0.99891
575
0.5205
4.99457
490
0.5268
4.25624
575
1.0246
4.99457
575
0.527
4.99457
575
1.9799
4.99457
575
1.1762
4.99457
575
2.0859
4.99457
575
2.0855
4.99457
State
Notes
User
Tags
Created
Runtime
Sweep
_name_or_path
accelerator_config.even_batches
accelerator_config.split_batches
accelerator_config.use_seedable_sampler
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
architectures
auto_find_batch_size
bf16
bf16_full_eval
chunk_size_feed_forward
classifier_dropout
d_ff
d_kv
d_model
data_seed
dataloader_drop_last
dataloader_num_workers
dataloader_persistent_workers
dataloader_pin_memory
ddp_timeout
debug
decoder_start_token_id
dense_act_fn
disable_tqdm
diversity_penalty
do_eval
do_predict
do_sample
do_train
dropout_rate
early_stopping
encoder_no_repeat_ngram_size
eos_token_id
eval_delay
eval_steps
evaluation_strategy
feed_forward_proj
fp16
fp16_backend
fp16_full_eval
Finished
-
pszemraj
1h 2m 21s
-
google/flan-t5-large
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2816
64
1024
16919
false
24
false
true
1800
[]
0
gelu_new
false
0
true
true
false
true
0.1
false
0
1
0
-
epoch
gated-gelu
false
auto
false
Finished
-
pszemraj
1h 3m 21s
-
pszemraj/tFINE-900m-e16-d32-7m_instruct-L1
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
3072
64
1024
16919
false
24
false
true
1800
[]
3
silu
false
0
true
true
false
true
0
false
0
2
0
-
epoch
gated-silu
false
auto
false
Failed
-
pszemraj
25m 45s
-
google/t5-v1_1-large
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2816
64
1024
16919
false
24
false
true
1800
[]
0
gelu_new
false
0
true
true
false
true
0.1
false
0
1
0
-
epoch
gated-gelu
false
auto
false
Failed
-
pszemraj
7m 35s
-
google/t5-v1_1-large
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2816
64
1024
16919
false
24
false
true
1800
[]
0
gelu_new
false
0
true
true
false
true
0.1
false
0
1
0
-
epoch
gated-gelu
false
auto
false
Finished
-
pszemraj
42m 49s
-
pszemraj/tFINE-900m-e16-d32-SNI
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
3072
64
1024
16919
false
24
false
true
1800
[]
3
silu
false
0
true
true
false
true
0
false
0
2
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
41m 30s
-
BEE-spoke-data/tFINE-900m-e16-d32-flan
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
3072
64
1024
16919
false
24
false
true
1800
[]
3
silu
false
0
true
true
false
true
0
false
0
2
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
47m 20s
-
pszemraj/tFINE-900m-e16-d32-1024ctx
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
3072
64
1024
16919
false
24
false
true
1800
[]
3
silu
false
0
true
true
false
true
0
false
0
2
0
-
epoch
gated-silu
false
auto
false
Failed
-
pszemraj
31m 34s
-
google/t5-v1_1-base
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2048
64
768
16919
false
32
false
true
1800
[]
0
gelu_new
false
0
true
true
false
true
0.1
false
0
1
0
-
epoch
gated-gelu
false
auto
false
Failed
-
pszemraj
8m 48s
-
google/t5-v1_1-base
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2048
64
768
16919
false
32
false
true
1800
[]
0
gelu_new
false
0
true
true
false
true
0.1
false
0
1
0
-
epoch
gated-gelu
false
auto
false
Finished
-
pszemraj
31m 29s
-
pszemraj/tFINE-base-300m-SNI
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2560
64
768
16919
false
32
false
true
1800
[]
3
silu
false
0
true
true
false
true
0
false
0
2
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
29m 40s
-
pszemraj/tFINE-base-300m
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2560
64
768
16919
false
32
false
true
1800
[]
3
silu
false
0
true
true
false
true
0
false
0
2
0
-
epoch
gated-silu
false
auto
false
Failed
-
pszemraj
7m 17s
-
pszemraj/tFINE-base-300m
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2560
64
768
16919
false
32
false
true
1800
[]
3
silu
false
0
true
true
false
true
0
false
0
2
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
23m 23s
-
pszemraj/tFINE-base-65kBPE
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2560
64
768
16919
false
14
false
true
1800
[]
65000
silu
false
0
true
true
false
true
0
false
0
65001
0
-
epoch
gated-silu
false
auto
false
Killed
-
pszemraj
20m 4s
-
pszemraj/tFINE-base-65kBPE
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2560
64
768
16919
false
14
false
true
1800
[]
65000
silu
false
0
true
true
false
true
0
false
0
65001
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
41m 2s
-
pszemraj/nanoT5-mid-65kBPE-2k-openhermes
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
3072
64
1024
16919
false
14
false
true
1800
[]
65000
silu
false
0
true
true
false
true
0
false
0
65001
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
21m 48s
-
pszemraj/nano-Tfine-base
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2048
64
768
16919
false
14
false
true
1800
[]
0
gelu_new
false
0
true
true
false
true
0
false
0
1
0
-
epoch
gated-gelu
false
auto
false
Finished
-
pszemraj
22m 30s
-
pszemraj/nanoT5-base-65kBPE-v2
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2048
64
768
16919
false
8
false
true
1800
[]
65000
silu
false
0
true
true
false
true
0
false
0
65001
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
39m
-
pszemraj/nanoT5-mid-65kBPE-2048
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
3072
64
1024
16919
false
8
false
true
1800
[]
65000
silu
false
0
true
true
false
true
0
false
0
65001
0
-
epoch
gated-silu
false
auto
false
Finished
-
pszemraj
25m 9s
-
google/t5-v1_1-base
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2048
64
768
16919
false
8
false
true
1800
[]
0
gelu_new
false
0
true
true
false
true
0.1
false
0
1
0
-
epoch
gated-gelu
false
auto
false
Finished
-
pszemraj
27m 1s
-
pszemraj/nanoT5-base-65kSP
true
false
true
false
0.9
0.999
1.0000e-8
false
["T5ForConditionalGeneration"]
false
true
false
0
0
2048
64
768
16919
false
8
false
true
1800
[]
0
silu
false
0
true
true
false
true
0.1
false
0
65536
0
-
epoch
gated-silu
false
auto
false
1-20
of 48