llama3-pruning Table – Weights & Biases

Skip to main content

Pszemraj's workspace

Runs

12

eval/loss

train/loss

train/epoch

2.0596

3.6148

0.027145

1.67938

1.9587

0.52211

1.72848

1.796

0.15977

1.72848

2.0844

0.023495

1.28602

1.4551

0.4777

1.48288

1.7088

0.10802

1.40284

1.5311

0.84979

1.50994

1.4816

0.39993

3.72229

1.7821

0.066255

2.47688

2.1213

1.98763

2.27023

2.2808

0.99973

2.45634

2.5359

0.99973

-

1y ago

16h 49m 10s

-

pszemraj/stablelm-4e1t-2b-v0.1

true

0.9

0.999

1.0000e-8

false

["StableLmForCausalLM"]

-

true

false

0

0

0.2

false

[]

32

unsloth

auto

silu

2560

6912

0.00002

cosine

1

4096

stablelm

32

22

32

1

paged_adamw_32bit

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

ki-tb

1y ago

13h 13m 19s

-

pszemraj/stablelm-4e1t-2b-v0.1

true

0.9

0.999

1.0000e-8

false

["StableLmForCausalLM"]

-

true

false

0

0

0.2

false

[]

32

unsloth

auto

silu

2560

6912

0.000015

cosine

1

4096

stablelm

32

22

32

1

paged_adamw_32bit

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

crashed bc runpod garbage

deepspeed

ki-tb

1y ago

6h 7m 29s

-

pszemraj/stablelm-4e1t-2b-v0.1

true

0.9

0.999

1.0000e-8

false

["StableLmForCausalLM"]

-

true

false

0

0

0.2

false

[]

32

true

auto

silu

2560

6912

0.000015

cosine

1

4096

stablelm

32

22

32

1

adamw_torch

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

deepspeed

ki-tb

1y ago

1h 2m

-

pszemraj/stablelm-4e1t-2b-v0.1

true

0.9

0.999

1.0000e-8

false

["StableLmForCausalLM"]

-

true

false

0

0

0.2

false

[]

16

true

auto

silu

2560

6912

0.00003

cosine

1

4096

stablelm

32

22

32

1

adamw_torch

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

ki-tb

1y ago

19h 39s

-

pszemraj/Mistral-7B-v0.3-prune6

true

0.9

0.999

1.0000e-8

false

["MistralForCausalLM"]

-

true

false

0

2

0.2

false

[]

16

true

auto

silu

4096

14336

0.00002

cosine

1

32768

mistral

32

26

8

1

paged_adamw_32bit

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

ki-tb

1y ago

4h 27m 33s

-

pszemraj/Mistral-7B-v0.3-prune6

true

0.9

0.999

1.0000e-8

false

["MistralForCausalLM"]

-

true

false

0

2

0.2

false

[]

16

true

auto

silu

4096

14336

0.00002

cosine

1

32768

mistral

32

26

8

1

paged_adamw_32bit

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

ki-tb

1y ago

1d 1h 15m 52s

-

pszemraj/Mistral-7B-v0.3-prune10

true

0.9

0.999

1.0000e-8

false

["MistralForCausalLM"]

-

true

false

0

2

0.2

false

[]

16

true

auto

silu

4096

14336

0.00002

cosine

1

32768

mistral

32

22

8

1

adamw_torch_fused

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

runpod error ;(

ki-tb

1y ago

11h 46m 42s

-

pszemraj/Mistral-7B-v0.3-prune10

true

0.9

0.999

1.0000e-8

false

["MistralForCausalLM"]

-

true

false

0

2

0.2

false

[]

16

true

auto

silu

4096

14336

0.00002

cosine

1

32768

mistral

32

22

8

1

adamw_torch_fused

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

different dataset

ki-tb

1y ago

2h 3m 20s

-

pszemraj/Mistral-7B-v0.3-prune10

true

0.9

0.999

1.0000e-8

false

["MistralForCausalLM"]

-

true

false

0

2

0.2

false

[]

16

true

auto

silu

4096

14336

0.00004

cosine

1

32768

mistral

32

22

8

1

adamw_torch_fused

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

1y ago

18h 31m 14s

-

pszemraj/stablelm-3b-4e1t-prune10

true

0.9

0.999

1.0000e-8

false

["StableLmForCausalLM"]

-

true

false

0

0

0.1

false

[]

16

true

auto

silu

2560

6912

0.00005

cosine

1

4096

stablelm

32

22

32

2

adamw_torch_fused

-1

1

1

false

false

-

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

1y ago

13h 12m 2s

-

pszemraj/llama-3-prune_8

true

0.9

0.999

1.0000e-8

false

["LlamaForCausalLM"]

false

true

false

0

128001

0.2

false

[]

16

true

auto

silu

4096

14336

0.00004

cosine

1

8192

llama

32

24

8

1

adamw_torch_fused

-1

1

1

false

false

1

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

-

1y ago

18h 2m 8s

-

pszemraj/llama-3-prune_12

true

0.9

0.999

1.0000e-8

false

["LlamaForCausalLM"]

false

true

false

0

128001

0.2

false

[]

8

true

auto

silu

4096

14336

0.00004

cosine

1

8192

llama

32

20

8

1

paged_adamw_32bit

-1

1

1

false

false

1

true

<PUSH_TO_HUB_TOKEN>

false

last

0.9

false

true

1

1-12

of 12