llama3-8b-full-single Table – Weights & Biases

Ebs's workspace

Runs

global_step

loss

peak_memory_active

peak_memory_alloc

peak_memory_reserved

tokens_per_second_per_gpu

Finished

ebs

12mo ago

2m 2s

torchtune.training.FullModelMetaCheckpointer

/tmp/Meta-Llama-3-8B-Instruct/original/

["consolidated.00.pth"]

LLAMA3

/tmp/Meta-Llama-3-8B-Instruct/

false

torchtune.datasets.alpaca_dataset

cuda

bf16

true

torchtune.modules.loss.CEWithChunkedOutputLoss

torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup

100

torchtune.training.metric_logging.WandBLogger

/tmp/full-llama3-finetune

new-bsz4-ga1

llama3-8b-full-single

torchtune.models.llama3.llama3_8b

bitsandbytes.optim.PagedAdamW8bit

0.00001

false

/tmp/full-llama3-finetune

false

true

torchtune.models.llama3.llama3_tokenizer

/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model

100

1.16348

0.00001

30.29896

31.69336

910.98041

Finished

ebs

12mo ago

torchtune.training.FullModelMetaCheckpointer

/tmp/Meta-Llama-3-8B-Instruct/original/

["consolidated.00.pth"]

LLAMA3

/tmp/Meta-Llama-3-8B-Instruct/

false

torchtune.datasets.alpaca_dataset

cuda

bf16

true

torchtune.modules.loss.CEWithChunkedOutputLoss

torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup

100

torchtune.training.metric_logging.WandBLogger

/tmp/full-llama3-finetune

baseline-bsz1-ga4

llama3-8b-full-single

torchtune.models.llama3.llama3_8b

bitsandbytes.optim.PagedAdamW8bit

0.00001

false

/tmp/full-llama3-finetune

false

true

torchtune.models.llama3.llama3_tokenizer

/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model

100

1.17437

0.00001

33.33807

34.41992

74.47246

Finished

ebs

12mo ago

3m 4s

torchtune.training.FullModelMetaCheckpointer

/tmp/Meta-Llama-3-8B-Instruct/original/

["consolidated.00.pth"]

LLAMA3

/tmp/Meta-Llama-3-8B-Instruct/

false

torchtune.datasets.alpaca_dataset

cuda

bf16

true

torchtune.modules.loss.CEWithChunkedOutputLoss

torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup

100

torchtune.training.metric_logging.WandBLogger

/tmp/full-llama3-finetune

new-bsz1-ga4

llama3-8b-full-single

torchtune.models.llama3.llama3_8b

bitsandbytes.optim.PagedAdamW8bit

0.00001

false

/tmp/full-llama3-finetune

false

true

torchtune.models.llama3.llama3_tokenizer

/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model

100

1.16044

0.00001

31.64735

32.63086

499.9202

Finished

ebs

12mo ago

2m 19s

torchtune.training.FullModelMetaCheckpointer

/tmp/Meta-Llama-3-8B-Instruct/original/

["consolidated.00.pth"]

LLAMA3

/tmp/Meta-Llama-3-8B-Instruct/

false

torchtune.datasets.alpaca_dataset

cuda

bf16

true

torchtune.modules.loss.CEWithChunkedOutputLoss

torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup

100

torchtune.training.metric_logging.WandBLogger

/tmp/full-llama3-finetune

baseline-bsz4-ga1

llama3-8b-full-single

torchtune.models.llama3.llama3_8b

bitsandbytes.optim.PagedAdamW8bit

0.00001

false

/tmp/full-llama3-finetune

false

true

torchtune.models.llama3.llama3_tokenizer

/tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model

100

1.16454

0.00001

30.29896

31.69336

914.93134

1-4

of 4