Awni00's workspace
Runs
8,360
Name
0 visualized
train_size: 25000
train_size: 25000
5
train_size: 22500
train_size: 22500
5
train_size: 20000
train_size: 20000
5
train_size: 17500
train_size: 17500
5
train_size: 15000
train_size: 15000
5
train_size: 12500
train_size: 12500
5
train_size: 10000
train_size: 10000
5
train_size: 7500
train_size: 7500
5
train_size: 5000
train_size: 5000
5
train_size: 2500
train_size: 2500
5
State
Notes
User
Tags
Created
Runtime
Sweep
activation
bias
d_model
dff
dropout_rate
group
image_shape
n_heads
n_heads_rca
n_heads_sa
n_layers
norm_first
num_classes
num_params
patch_size
pool
rca_kwargs.symmetric_rels
rca_kwargs.use_relative_positional_symbols
rca_type
symbol_retrieval
symbol_retrieval_kwargs.max_length
symbol_retrieval_kwargs.max_rel_pos
symbol_retrieval_kwargs.symbol_dim
task
train_size
val_size
attn_kwargs.symmetric_attn
n_heads_ra
ra_kwargs.n_relations
ra_kwargs.rel_activation
ra_kwargs.rel_proj_dim
ra_kwargs.symmetric_attn
ra_kwargs.symmetric_rels
ra_type
sa_kwargs.symmetric_attn
test/acc_hexos
test/acc_in_distribution
test/acc_stripes
test/loss_hexos
test/loss_in_distribution
test/loss_stripes
Finished
awni00
37m 27s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=0; ra=8; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
0
2
true
2
390498
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
13750
5000
-
8
8
identity
16
true
true
relational_attention
true
0.74848
0.76565
0.73354
0.48647
0.44655
0.53067
Finished
awni00
35m 38s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=0; ra=4; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
0
2
true
2
389474
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
13750
5000
-
4
4
identity
32
true
true
relational_attention
true
0.68622
0.70296
0.66145
0.57452
0.53847
0.63932
Finished
awni00
34m 49s
-
swiglu
false
128
256
0.1
1task_between__sa=0; ra=8; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=pos
25
-
-
0
2
true
2
390498
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
8
8
identity
16
true
true
relational_attention
true
0.74619
0.76521
0.71806
0.62762
0.53373
0.80628
Finished
awni00
33m 46s
-
swiglu
false
128
256
0.1
1task_between__sa=0; ra=4; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=pos
25
-
-
0
2
true
2
389474
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
4
4
identity
32
true
true
relational_attention
true
0.76452
0.7831
0.72635
0.57992
0.48807
0.7912
Finished
awni00
33m 37s
-
swiglu
false
128
256
0.1
xoccurs__sa=0; ra=8; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positiona
25
-
-
0
2
true
2
390498
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
8
8
identity
16
true
true
relational_attention
true
0.67707
0.69647
0.64422
0.59347
0.53594
0.72866
Finished
awni00
34m 42s
-
swiglu
false
128
256
0.1
xoccurs__sa=0; ra=4; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positiona
25
-
-
0
2
true
2
389474
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
4
4
identity
32
true
true
relational_attention
true
0.7104
0.7333
0.66832
0.56378
0.48893
0.76603
Finished
awni00
33m 7s
-
swiglu
false
128
256
0.1
occurs__sa=0; ra=8; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional
25
-
-
0
2
true
2
390498
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
8
8
identity
16
true
true
relational_attention
true
0.75976
0.77669
0.69097
0.45519
0.39116
0.73875
Finished
awni00
33m 15s
-
swiglu
false
128
256
0.1
occurs__sa=0; ra=4; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional
25
-
-
0
2
true
2
389474
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
4
4
identity
32
true
true
relational_attention
true
0.78188
0.80084
0.70346
0.46397
0.38858
0.82144
Finished
awni00
33m 38s
-
swiglu
false
128
256
0.1
same__sa=0; ra=8; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_s
25
-
-
0
2
true
2
390498
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
8
8
identity
16
true
true
relational_attention
true
0.68279
0.70826
0.66787
0.77068
0.65523
0.87849
Finished
awni00
33m 45s
-
swiglu
false
128
256
0.1
same__sa=0; ra=4; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_s
25
-
-
0
2
true
2
389474
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
4
4
identity
32
true
true
relational_attention
true
0.69386
0.71653
0.6632
0.77018
0.65017
0.9638
Finished
awni00
32m 41s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
13750
5000
-
2
4
identity
16
true
true
relational_attention
true
0.59577
0.60909
0.57994
0.71792
0.70422
0.74497
Finished
awni00
19m 40s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
25000
5000
-
2
4
identity
16
true
true
relational_attention
true
0.90359
0.93358
0.85679
0.28237
0.17881
0.4035
Finished
awni00
20m 6s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
22500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.74326
0.76456
0.70805
0.49404
0.43374
0.58679
Finished
awni00
20m 28s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
20000
5000
-
2
4
identity
16
true
true
relational_attention
true
0.70369
0.72028
0.66833
0.55031
0.50418
0.61529
Finished
awni00
20m 58s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
17500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.55285
0.56722
0.53422
0.73179
0.72286
0.75229
Finished
awni00
21m 36s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
15000
5000
-
2
4
identity
16
true
true
relational_attention
true
0.51369
0.5318
0.51343
0.78202
0.7894
0.75848
Finished
awni00
22m 1s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
12500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.51927
0.53178
0.50995
0.79125
0.80896
0.81275
Finished
awni00
1m 7s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
[3,36,36]
-
-
2
2
true
2
356194
[12,12]
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
12500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.52245
0.5342
0.5152
0.77775
0.7889
0.85921
Finished
awni00
1m 32s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
[3,36,36]
-
-
2
2
true
2
356194
[12,12]
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
12500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.5549
0.5806
0.5215
0.77472
0.77375
0.82257
Finished
awni00
1m 33s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
[3,36,36]
-
-
2
2
true
2
356194
[12,12]
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
12500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.5045
0.5103
0.4985
0.78947
0.81443
0.80858
Finished
awni00
1m 35s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
[3,36,36]
-
-
2
2
true
2
356194
[12,12]
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
12500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.50405
0.5173
0.50585
0.82166
0.83836
0.81951
Finished
awni00
1m 34s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
[3,36,36]
-
-
2
2
true
2
356194
[12,12]
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
12500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.51045
0.5165
0.5087
0.79267
0.82937
0.75387
Finished
awni00
22m 22s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
10000
5000
-
2
4
identity
16
true
true
relational_attention
true
0.50909
0.51834
0.50321
0.83007
0.84315
0.80776
Finished
awni00
22m 45s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
7500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.50678
0.5128
0.50408
0.89475
0.89204
0.92786
Finished
awni00
22m 38s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
5000
5000
-
2
4
identity
16
true
true
relational_attention
true
0.50472
0.5079
0.50256
0.95944
0.9839
0.94849
Finished
awni00
23m 33s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
2500
5000
-
2
4
identity
16
true
true
relational_attention
true
0.50081
0.5026
0.49883
0.86314
0.88515
0.8365
Finished
awni00
35m 57s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=4; ra=4; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
4
2
true
2
356706
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
13750
5000
-
4
8
identity
8
true
true
relational_attention
true
0.65277
0.66723
0.6385
0.64299
0.61868
0.68362
Finished
awni00
29m 43s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
1
2
true
2
355938
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
13750
5000
-
1
2
identity
32
true
true
relational_attention
true
0.61642
0.6313
0.59761
0.67521
0.65405
0.71377
Finished
awni00
30m 53s
-
swiglu
false
128
256
0.1
1task_match_patt__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=
25
-
-
0
2
true
2
388962
12
mean
-
-
-
positional_symbols
10
-
128
1task_match_patt
13750
5000
-
2
2
identity
64
true
true
relational_attention
true
0.63542
0.65182
0.61196
0.6255
0.59621
0.68215
Finished
awni00
26m 33s
-
swiglu
false
128
256
0.1
1task_between__sa=4; ra=4; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=pos
25
-
-
4
2
true
2
356706
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
4
8
identity
8
true
true
relational_attention
true
0.7386
0.75748
0.7038
0.62405
0.52228
0.8611
Finished
awni00
26m 10s
-
swiglu
false
128
256
0.1
1task_between__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=pos
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
2
4
identity
16
true
true
relational_attention
true
0.75721
0.77651
0.72232
0.58778
0.4885
0.78494
Finished
awni00
25m 24s
-
swiglu
false
128
256
0.1
1task_between__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=pos
25
-
-
1
2
true
2
355938
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
1
2
identity
32
true
true
relational_attention
true
0.76093
0.78263
0.71152
0.61419
0.49817
0.92015
Finished
awni00
25m 32s
-
swiglu
false
128
256
0.1
1task_between__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=pos
25
-
-
0
2
true
2
388962
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
2
2
identity
64
true
true
relational_attention
true
0.77473
0.79508
0.72584
0.54107
0.43981
0.79369
Finished
awni00
1h 7m 32s
-
swiglu
false
128
256
0.1
xoccurs__sa=4; ra=4; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
4
2
true
2
356706
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
4
8
identity
8
true
true
relational_attention
true
0.6648
0.68312
0.62962
0.62402
0.56498
0.77842
Finished
awni00
1h 13m 18s
-
swiglu
false
128
256
0.1
xoccurs__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
2
4
identity
16
true
true
relational_attention
true
0.68922
0.70906
0.64157
0.60175
0.5346
0.83788
Finished
awni00
1h 12m 53s
-
swiglu
false
128
256
0.1
xoccurs__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
1
2
true
2
355938
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
1
2
identity
32
true
true
relational_attention
true
0.71659
0.74426
0.66392
0.60551
0.50885
0.85111
Finished
awni00
1h 9m 46s
-
swiglu
false
128
256
0.1
xoccurs__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
0
2
true
2
388962
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
2
2
identity
64
true
true
relational_attention
true
0.71752
0.74269
0.66394
0.56122
0.4751
0.81971
Finished
awni00
1h 10m 15s
-
swiglu
false
128
256
0.1
occurs__sa=4; ra=4; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
4
2
true
2
356706
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
4
8
identity
8
true
true
relational_attention
true
0.76133
0.77888
0.67608
0.46288
0.39429
0.82766
Finished
awni00
1h 5m 56s
-
swiglu
false
128
256
0.1
occurs__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
2
4
identity
16
true
true
relational_attention
true
0.76956
0.78736
0.68511
0.46809
0.40236
0.83939
Finished
awni00
1h 5m 25s
-
swiglu
false
128
256
0.1
same__sa=4; ra=4; nr=8; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
4
2
true
2
356706
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
4
8
identity
8
true
true
relational_attention
true
0.68905
0.71063
0.65535
0.83573
0.70678
1.08543
Finished
awni00
1h 7m 41s
-
swiglu
false
128
256
0.1
occurs__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
1
2
true
2
355938
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
1
2
identity
32
true
true
relational_attention
true
0.78929
0.81469
0.67655
0.47284
0.37367
0.9649
Finished
awni00
1h 7m 56s
-
swiglu
false
128
256
0.1
occurs__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
0
2
true
2
388962
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
2
2
identity
64
true
true
relational_attention
true
0.791
0.81677
0.67694
0.47294
0.37127
1.01755
Finished
awni00
1h 6m 15s
-
swiglu
false
128
256
0.1
same__sa=2; ra=2; nr=4; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
2
2
true
2
356194
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
2
4
identity
16
true
true
relational_attention
true
0.71037
0.73524
0.66815
0.74001
0.6103
0.97674
Finished
awni00
1h 5m 39s
-
swiglu
false
128
256
0.1
same__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
1
2
true
2
355938
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
1
2
identity
32
true
true
relational_attention
true
0.7278
0.75389
0.67159
0.7679
0.62622
1.07676
Finished
awni00
1h 5m 25s
-
swiglu
false
128
256
0.1
same__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; sym_attn=True; symbol_type=positional_symbols
25
-
-
0
2
true
2
388962
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
2
2
identity
64
true
true
relational_attention
true
0.73234
0.75781
0.67279
0.66262
0.54551
0.95934
Finished
awni00
47m 31s
-
swiglu
false
128
256
0.1
1task_between__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
1
2
true
2
372322
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
1
2
identity
32
-
true
relational_attention
true
0.72988
0.74696
0.70298
0.6757
0.59496
0.8367
Finished
awni00
47m 19s
-
swiglu
false
128
256
0.1
1task_between__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
0
2
true
2
421730
12
mean
-
-
-
positional_symbols
10
-
128
1task_between
1375
5000
-
2
2
identity
64
-
true
relational_attention
true
0.73973
0.75693
0.72424
0.65754
0.59836
0.69096
Finished
awni00
47m 17s
-
swiglu
false
128
256
0.1
xoccurs__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
1
2
true
2
372322
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
1
2
identity
32
-
true
relational_attention
true
0.63613
0.65895
0.59824
0.74785
0.68115
0.92407
Finished
awni00
47m 31s
-
swiglu
false
128
256
0.1
xoccurs__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
0
2
true
2
421730
12
mean
-
-
-
positional_symbols
10
-
128
xoccurs
1375
5000
-
2
2
identity
64
-
true
relational_attention
true
0.58464
0.60078
0.5763
0.75548
0.72932
0.77086
Finished
awni00
45m 12s
-
swiglu
false
128
256
0.1
occurs__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
1
2
true
2
372322
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
1
2
identity
32
-
true
relational_attention
true
0.74723
0.77157
0.66051
0.52263
0.43949
0.88065
Finished
awni00
41m 45s
-
swiglu
false
128
256
0.1
occurs__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
0
2
true
2
421730
12
mean
-
-
-
positional_symbols
10
-
128
occurs
1375
5000
-
2
2
identity
64
-
true
relational_attention
true
0.6794
0.69489
0.64235
0.61538
0.57923
0.73344
Finished
awni00
45m 35s
-
swiglu
false
128
256
0.1
same__sa=1; ra=1; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
1
2
true
2
372322
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
1
2
identity
32
-
true
relational_attention
true
0.72553
0.75089
0.68893
0.73753
0.62578
0.90267
Finished
awni00
44m 55s
-
swiglu
false
128
256
0.1
same__sa=0; ra=2; nr=2; d=128; L=2; ra_type=relational_attention; sym_rel=True; symbol_type=positional_symbols
25
-
-
0
2
true
2
421730
12
mean
-
-
-
positional_symbols
10
-
128
same
1375
5000
-
2
2
identity
64
-
true
relational_attention
true
0.76368
0.78355
0.73583
0.62555
0.54121
0.73167
1-38
of 38