Bradhilton's workspace
Runs
46
State
Notes
User
Tags
Created
Runtime
Sweep
betas
clip_epsilon
clip_grad_norm
entropy_coef
kl_coef
lr
model
off_policy
samples_per_task
seq_len
stride
tanh
task
tasks_per_iter
weight_decay
acc
early_stop
entropy
exceptions
grad_magnitude
iteration
kl_div
loss
policy
reward
surprise
tokens
tokens_per_second_per_gpu
Crashed
-
bradhilton
3h 2m 30s
-
[0.9,0.99]
0.2
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
true
50
16384
32
false
temporal-clue
32
0.1
0.37335
0.015625
0.85798
[]
-
8
NaN
369912825.1074
369912825.1074
0.37335
-
782
961.8544
Crashed
-
bradhilton
44m
-
[0.9,0.99]
0.02
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
true
50
16384
32
false
temporal-clue
32
0.1
0.28286
0.13281
0.99506
[]
-
1
NaN
752483.10764
752483.10764
0.28286
-
883
951.25705
Crashed
-
bradhilton
36m 45s
-
[0.9,0.99]
0.02
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
true
50
16384
32
false
temporal-clue
32
0.1
0.25756
0.17188
-
[]
-
0
-
-
-
0.25756
-
858
-
Crashed
-
bradhilton
32m
-
[0.9,0.99]
0.2
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
true
50
16384
32
false
temporal-clue
32
0.1
0.25669
0.16406
-
[]
-
0
-
-
-
0.25669
-
845
-
Crashed
-
bradhilton
1h 26m 31s
-
[0.9,0.99]
0.2
-
0
0
0.000006
NousResearch/Hermes-2-Theta-Llama-3-8B
-
50
16384
32
false
temporal-clue
32
0.1
0.29377
0.0078125
0.085809
[]
-
11
NaN
0.019488
0.019488
0.29377
-
95
1304.30857
Crashed
-
bradhilton
4h 17m 17s
-
[0.9,0.99]
0.2
0.2
0
0
0.000006
Qwen/Qwen2.5-32B-Instruct
-
50
16384
32
false
temporal-clue
32
0.1
0.61195
-
0.0071465
[]
-
181
NaN
0.0028111
0.0028754
0.35436
-
982
584.70489
Crashed
-
bradhilton
9h 30m 17s
-
[0.9,0.99]
0.2
0.3
0
0
0.000008
Qwen/Qwen2.5-14B-Instruct
-
50
16384
32
false
temporal-clue
32
0.1
0.54691
-
-
[]
-
110
-
-
-
0.54691
-
1874
-
Crashed
-
bradhilton
6h 15m 54s
-
[0.9,0.99]
0.2
0.5
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
-
50
16384
32
false
temporal-clue
32
0.1
0.43597
0.0078125
-
[]
-
18
-
-
-
0.43597
-
808
-
Crashed
-
bradhilton
10h 50m 25s
-
[0.9,0.99]
0.2
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
-
100
16384
16
false
temporal-clue
16
0.1
0.4377
0.0078125
0.30998
[]
-
29
NaN
0.00047522
0.00047522
0.4377
-
1118
956.00731
Crashed
-
bradhilton
13h 49m 29s
-
[0.9,0.99]
0.2
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
-
50
16384
32
false
temporal-clue
32
0.1
0.51184
0.015625
0.29252
[]
-
39
NaN
0.0023981
0.0023981
0.32125
-
1118
952.55334
Crashed
-
bradhilton
5d 21h 34m 33s
-
[0.9,0.99]
0.2
0.3
0
0
0.000006
Qwen/Qwen2.5-32B-Instruct
-
50
16384
32
false
temporal-clue
32
0.1
0.61414
0.0078125
0.013348
[]
-
223
NaN
0.0042403
0.0042403
0.61414
-
47
433.90702
Finished
-
bradhilton
3h 39m 39s
-
[0.9,0.99]
0.2
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
-
100
16384
16
false
temporal-clue
16
0.1
0.38074
0.0078125
0.60102
[]
-
10
NaN
-0.00026949
-0.00026949
0.38074
-
915
983.08311
Finished
-
bradhilton
3h 54m 46s
-
[0.9,0.99]
0.2
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
-
32
16384
64
false
temporal-clue
64
0.1
0.39234
0.0078125
0.66562
[]
-
9
NaN
0.0030049
0.0030049
0.39234
-
888
963.30528
Finished
-
bradhilton
2h 2m 49s
-
[0.9,0.99]
0.2
-
0
0
0.000006
Qwen/Qwen2.5-14B-Instruct
-
50
16384
32
false
temporal-clue
32
0.1
0.27973
0.054688
-
[]
-
5
-
-
-
0.27973
-
871
-
Crashed
-
bradhilton
6d 12h 47m 48s
-
[0.9,0.99]
0.2
0
0
0
0.000006
Qwen/Qwen2.5-72B-Instruct
-
50
16384
16
false
temporal-clue
16
0.1
0.49565
-
0.041797
[]
-
56
NaN
-0.0065436
-0.0065436
0.49565
-
2285
265.75406
Finished
-
bradhilton
2h 51m 1s
-
[0.9,0.99]
0.2
-
0
0
0.000006
NousResearch/Hermes-2-Theta-Llama-3-8B
-
64
16384
32
false
temporal-clue
64
0.1
0.34803
-
0.18374
[]
-
17
NaN
0.014124
0.014124
0.34803
-
34
816.07202
Crashed
-
bradhilton
56m 15s
-
[0.9,0.99]
0.2
-
0
0
0.00001
NousResearch/Hermes-2-Theta-Llama-3-8B
-
50
16384
32
false
temporal-clue
64
0.1
0.33004
-
0.57827
[]
4.4375
1
NaN
0.03402
0.03402
0.33004
-
36
1095.98706
Crashed
-
bradhilton
37m 51s
-
[0.9,0.99]
0.2
-
0
0
0.00001
NousResearch/Hermes-2-Theta-Llama-3-8B
-
50
16384
32
false
temporal-clue
64
0.1
0.32145
-
0.57961
[]
4.25
1
NaN
0.03033
0.03033
0.32145
-
36
1101.77283
Crashed
-
bradhilton
40m
-
[0.9,0.99]
0.2
-
0
0
0.00001
NousResearch/Hermes-2-Theta-Llama-3-8B
-
50
16384
32
false
temporal-clue
64
0.1
0.23506
-
0.016354
[]
836
3
NaN
-0.031377
-0.031377
0.23506
-
45
637.84967
Finished
-
bradhilton
1d 1h 52m 6s
-
[0.9,0.99]
0.2
-
0
0
0.000006
NousResearch/Hermes-2-Theta-Llama-3-8B
-
50
16384
32
false
temporal-clue
64
0.1
0.095396
-
0.30002
[]
276
6
NaN
0.222
0.222
0.095396
-
35
522.50232
1-20
of 46