rl-experiments Table – Weights & Biases

Skip to main content

Bradhilton's workspace

Runs

46

acc

early_stop

entropy

exceptions

grad_magnitude

iteration

kl_div

loss

policy

reward

surprise

tokens

tokens_per_second_per_gpu

Crashed

-

bradhilton

6mo ago

3h 2m 30s

-

[0.9,0.99]

0.2

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

true

50

16384

32

false

temporal-clue

32

0.1

0.37335

0.015625

0.85798

[]

-

8

NaN

369912825.1074

369912825.1074

0.37335

-

782

961.8544

Crashed

-

bradhilton

6mo ago

44m

-

[0.9,0.99]

0.02

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

true

50

16384

32

false

temporal-clue

32

0.1

0.28286

0.13281

0.99506

[]

-

1

NaN

752483.10764

752483.10764

0.28286

-

883

951.25705

Crashed

-

bradhilton

6mo ago

36m 45s

-

[0.9,0.99]

0.02

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

true

50

16384

32

false

temporal-clue

32

0.1

0.25756

0.17188

-

[]

-

0

-

-

-

0.25756

-

858

-

Crashed

-

bradhilton

6mo ago

32m

-

[0.9,0.99]

0.2

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

true

50

16384

32

false

temporal-clue

32

0.1

0.25669

0.16406

-

[]

-

0

-

-

-

0.25669

-

845

-

Crashed

-

bradhilton

6mo ago

1h 26m 31s

-

[0.9,0.99]

0.2

-

0

0

0.000006

NousResearch/Hermes-2-Theta-Llama-3-8B

-

50

16384

32

false

temporal-clue

32

0.1

0.29377

0.0078125

0.085809

[]

-

11

NaN

0.019488

0.019488

0.29377

-

95

1304.30857

Crashed

-

bradhilton

6mo ago

4h 17m 17s

-

[0.9,0.99]

0.2

0.2

0

0

0.000006

Qwen/Qwen2.5-32B-Instruct

-

50

16384

32

false

temporal-clue

32

0.1

0.61195

-

0.0071465

[]

-

181

NaN

0.0028111

0.0028754

0.35436

-

982

584.70489

Crashed

-

bradhilton

7mo ago

9h 30m 17s

-

[0.9,0.99]

0.2

0.3

0

0

0.000008

Qwen/Qwen2.5-14B-Instruct

-

50

16384

32

false

temporal-clue

32

0.1

0.54691

-

-

[]

-

110

-

-

-

0.54691

-

1874

-

Crashed

-

bradhilton

7mo ago

6h 15m 54s

-

[0.9,0.99]

0.2

0.5

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

-

50

16384

32

false

temporal-clue

32

0.1

0.43597

0.0078125

-

[]

-

18

-

-

-

0.43597

-

808

-

Crashed

-

bradhilton

7mo ago

10h 50m 25s

-

[0.9,0.99]

0.2

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

-

100

16384

16

false

temporal-clue

16

0.1

0.4377

0.0078125

0.30998

[]

-

29

NaN

0.00047522

0.00047522

0.4377

-

1118

956.00731

Crashed

-

bradhilton

7mo ago

13h 49m 29s

-

[0.9,0.99]

0.2

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

-

50

16384

32

false

temporal-clue

32

0.1

0.51184

0.015625

0.29252

[]

-

39

NaN

0.0023981

0.0023981

0.32125

-

1118

952.55334

Crashed

-

bradhilton

7mo ago

5d 21h 34m 33s

-

[0.9,0.99]

0.2

0.3

0

0

0.000006

Qwen/Qwen2.5-32B-Instruct

-

50

16384

32

false

temporal-clue

32

0.1

0.61414

0.0078125

0.013348

[]

-

223

NaN

0.0042403

0.0042403

0.61414

-

47

433.90702

Finished

-

bradhilton

7mo ago

3h 39m 39s

-

[0.9,0.99]

0.2

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

-

100

16384

16

false

temporal-clue

16

0.1

0.38074

0.0078125

0.60102

[]

-

10

NaN

-0.00026949

-0.00026949

0.38074

-

915

983.08311

Finished

-

bradhilton

7mo ago

3h 54m 46s

-

[0.9,0.99]

0.2

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

-

32

16384

64

false

temporal-clue

64

0.1

0.39234

0.0078125

0.66562

[]

-

9

NaN

0.0030049

0.0030049

0.39234

-

888

963.30528

Finished

-

bradhilton

7mo ago

2h 2m 49s

-

[0.9,0.99]

0.2

-

0

0

0.000006

Qwen/Qwen2.5-14B-Instruct

-

50

16384

32

false

temporal-clue

32

0.1

0.27973

0.054688

-

[]

-

5

-

-

-

0.27973

-

871

-

Crashed

-

bradhilton

7mo ago

6d 12h 47m 48s

-

[0.9,0.99]

0.2

0

0

0

0.000006

Qwen/Qwen2.5-72B-Instruct

-

50

16384

16

false

temporal-clue

16

0.1

0.49565

-

0.041797

[]

-

56

NaN

-0.0065436

-0.0065436

0.49565

-

2285

265.75406

Finished

-

bradhilton

7mo ago

2h 51m 1s

-

[0.9,0.99]

0.2

-

0

0

0.000006

NousResearch/Hermes-2-Theta-Llama-3-8B

-

64

16384

32

false

temporal-clue

64

0.1

0.34803

-

0.18374

[]

-

17

NaN

0.014124

0.014124

0.34803

-

34

816.07202

Crashed

-

bradhilton

7mo ago

56m 15s

-

[0.9,0.99]

0.2

-

0

0

0.00001

NousResearch/Hermes-2-Theta-Llama-3-8B

-

50

16384

32

false

temporal-clue

64

0.1

0.33004

-

0.57827

[]

4.4375

1

NaN

0.03402

0.03402

0.33004

-

36

1095.98706

Crashed

-

bradhilton

7mo ago

37m 51s

-

[0.9,0.99]

0.2

-

0

0

0.00001

NousResearch/Hermes-2-Theta-Llama-3-8B

-

50

16384

32

false

temporal-clue

64

0.1

0.32145

-

0.57961

[]

4.25

1

NaN

0.03033

0.03033

0.32145

-

36

1101.77283

Crashed

-

bradhilton

7mo ago

40m

-

[0.9,0.99]

0.2

-

0

0

0.00001

NousResearch/Hermes-2-Theta-Llama-3-8B

-

50

16384

32

false

temporal-clue

64

0.1

0.23506

-

0.016354

[]

836

3

NaN

-0.031377

-0.031377

0.23506

-

45

637.84967

Finished

-

bradhilton

7mo ago

1d 1h 52m 6s

-

[0.9,0.99]

0.2

-

0

0

0.000006

NousResearch/Hermes-2-Theta-Llama-3-8B

-

50

16384

32

false

temporal-clue

64

0.1

0.095396

-

0.30002

[]

276

6

NaN

0.222

0.222

0.095396

-

35

522.50232

1-20

of 46