torchtune_lr_scheduler_tests Table

Psarthi's workspace

Runs

global_step

loss

peak_memory_active

peak_memory_alloc

peak_memory_reserved

Finished

psarthi

1y ago

5m 50s

torchtune.training.FullModelHFCheckpointer

/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd

["hf_model_0001_2.pt","hf_model_0002_2.pt","hf_model_0003_2.pt","hf_model_0004_2.pt"]

LLAMA3

/projects/data/parth/llama_3_1_lr_scheduler_test/resume_cosine_lr_warmup_50_no_optim_bwd

true

["tok_embeddings","output"]

[{"name":"fineweb-edu-dedup","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"},{"name":"cosmopedia-v2","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"}]

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/projects/data/parth/llama_3_1_lr_scheduler_test/resume_cosine_lr_warmup_50_no_optim_bwd

resume_cosine_lr_warmup_50_no_optim_bwd_4_gpus_test

torchtune_lr_scheduler_tests

torchtune.models.llama3_1.llama3_1_8b

torch.optim.AdamW

true

0.00002

false

/projects/data/parth/llama_3_1_lr_scheduler_test/resume_cosine_lr_warmup_50_no_optim_bwd

true

1234

true

torchtune.models.llama3.llama3_tokenizer

2048

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model

recipe_state.pt

torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup

300

1.91079

32.05961

75.05664

Killed

psarthi

1y ago

3m 40s

torchtune.training.FullModelHFCheckpointer

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/

["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]

LLAMA3

/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd

true

["tok_embeddings","output"]

[{"split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset","name":"fineweb-edu-dedup"},{"name":"cosmopedia-v2","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"}]

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd

cosine_lr_warmup_50_no_optim_bwd_4_gpus_test

torchtune_lr_scheduler_tests

torchtune.models.llama3_1.llama3_1_8b

torch.optim.AdamW

true

0.00002

false

/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd

false

1234

true

torchtune.models.llama3.llama3_tokenizer

2048

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model

torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup

101

2.32159

0.000018016

42.09233

62.93359

Finished

psarthi

1y ago

8m 3s

torchtune.training.FullModelHFCheckpointer

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/

["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]

LLAMA3

/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_optim_bwd

true

["tok_embeddings","output"]

[{"_component_":"torchtune.datasets.text_completion_dataset","name":"fineweb-edu-dedup","split":"train","source":"HuggingFaceTB/smollm-corpus"},{"split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset","name":"cosmopedia-v2"}]

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_optim_bwd

cosine_lr_warmup_50_optim_bwd_4_gpus_test

torchtune_lr_scheduler_tests

torchtune.models.llama3_1.llama3_1_8b

torch.optim.AdamW

true

0.00002

true

/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_optim_bwd

false

1234

true

torchtune.models.llama3.llama3_tokenizer

2048

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model

torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup

300

1.90786

32.062

58.50977

Finished

psarthi

1y ago

8m 9s

torchtune.training.FullModelHFCheckpointer

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/

["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]

LLAMA3

/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_optim_bwd

true

["tok_embeddings","output"]

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_optim_bwd

no_lr_optim_bwd_4_gpus_test

torchtune_lr_scheduler_tests

torchtune.models.llama3_1.llama3_1_8b

torch.optim.AdamW

true

0.00002

true

/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_optim_bwd

false

1234

true

torchtune.models.llama3.llama3_tokenizer

2048

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model

300

1.91847

0.00002

32.062

58.50977

Finished

psarthi

1y ago

6m 38s

torchtune.training.FullModelHFCheckpointer

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/

["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]

LLAMA3

/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_no_optim_bwd

true

["tok_embeddings","output"]

[{"_component_":"torchtune.datasets.text_completion_dataset","name":"fineweb-edu-dedup","split":"train","source":"HuggingFaceTB/smollm-corpus"},{"_component_":"torchtune.datasets.text_completion_dataset","name":"cosmopedia-v2","split":"train","source":"HuggingFaceTB/smollm-corpus"}]

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_no_optim_bwd

no_lr_schd_no_optim_bwd_4_gpus_test

torchtune_lr_scheduler_tests

torchtune.models.llama3_1.llama3_1_8b

torch.optim.AdamW

true

0.00002

false

/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_no_optim_bwd

false

1234

true

torchtune.models.llama3.llama3_tokenizer

2048

/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model

300

2.44195

0.00002

27.82121

43.7793

1-5

of 5