Psarthi's workspace
Runs
5
Name
5 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
batch_size
checkpointer._component_
checkpointer.checkpoint_dir
checkpointer.checkpoint_files
checkpointer.model_type
checkpointer.output_dir
compile
custom_sharded_layers
dataset
device
dtype
enable_activation_checkpointing
enable_activation_offloading
epochs
gradient_accumulation_steps
log_every_n_steps
log_peak_memory_stats
loss._component_
max_steps_per_epoch
metric_logger._component_
metric_logger.log_dir
metric_logger.name
metric_logger.project
model._component_
optimizer._component_
optimizer.fused
optimizer.lr
optimizer_in_bwd
output_dir
resume_from_checkpoint
seed
shuffle
tokenizer._component_
tokenizer.max_seq_len
tokenizer.path
checkpointer.recipe_checkpoint
lr_scheduler._component_
lr_scheduler.num_warmup_steps
global_step
loss
lr
peak_memory_active
peak_memory_alloc
peak_memory_reserved
Finished
-
psarthi
5m 50s
-
4
torchtune.training.FullModelHFCheckpointer
/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd
["hf_model_0001_2.pt","hf_model_0002_2.pt","hf_model_0003_2.pt","hf_model_0004_2.pt"]
LLAMA3
/projects/data/parth/llama_3_1_lr_scheduler_test/resume_cosine_lr_warmup_50_no_optim_bwd
true
["tok_embeddings","output"]
[{"source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset","name":"fineweb-edu-dedup","split":"train"},{"name":"cosmopedia-v2","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"}]
cuda
bf16
false
false
3
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
100
torchtune.training.metric_logging.WandBLogger
/projects/data/parth/llama_3_1_lr_scheduler_test/resume_cosine_lr_warmup_50_no_optim_bwd
resume_cosine_lr_warmup_50_no_optim_bwd_4_gpus_test
torchtune_lr_scheduler_tests
torchtune.models.llama3_1.llama3_1_8b
torch.optim.AdamW
true
0.00002
false
/projects/data/parth/llama_3_1_lr_scheduler_test/resume_cosine_lr_warmup_50_no_optim_bwd
true
1234
true
torchtune.models.llama3.llama3_tokenizer
2048
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model
recipe_state.pt
torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
50
300
1.91079
0
32.05961
32.05961
75.05664
Killed
-
psarthi
3m 40s
-
4
torchtune.training.FullModelHFCheckpointer
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/
["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]
LLAMA3
/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd
true
["tok_embeddings","output"]
[{"_component_":"torchtune.datasets.text_completion_dataset","name":"fineweb-edu-dedup","split":"train","source":"HuggingFaceTB/smollm-corpus"},{"name":"cosmopedia-v2","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"}]
cuda
bf16
false
false
3
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
100
torchtune.training.metric_logging.WandBLogger
/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd
cosine_lr_warmup_50_no_optim_bwd_4_gpus_test
torchtune_lr_scheduler_tests
torchtune.models.llama3_1.llama3_1_8b
torch.optim.AdamW
true
0.00002
false
/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_no_optim_bwd
false
1234
true
torchtune.models.llama3.llama3_tokenizer
2048
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model
-
torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
50
101
2.32159
0.000018016
42.09233
42.09233
62.93359
Finished
-
psarthi
8m 3s
-
4
torchtune.training.FullModelHFCheckpointer
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/
["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]
LLAMA3
/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_optim_bwd
true
["tok_embeddings","output"]
[{"_component_":"torchtune.datasets.text_completion_dataset","name":"fineweb-edu-dedup","split":"train","source":"HuggingFaceTB/smollm-corpus"},{"name":"cosmopedia-v2","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"}]
cuda
bf16
false
false
3
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
100
torchtune.training.metric_logging.WandBLogger
/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_optim_bwd
cosine_lr_warmup_50_optim_bwd_4_gpus_test
torchtune_lr_scheduler_tests
torchtune.models.llama3_1.llama3_1_8b
torch.optim.AdamW
true
0.00002
true
/projects/data/parth/llama_3_1_lr_scheduler_test/cosine_lr_warmup_50_optim_bwd
false
1234
true
torchtune.models.llama3.llama3_tokenizer
2048
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model
-
torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
50
300
1.90786
0
32.062
32.062
58.50977
Finished
-
psarthi
8m 9s
-
4
torchtune.training.FullModelHFCheckpointer
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/
["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]
LLAMA3
/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_optim_bwd
true
["tok_embeddings","output"]
[{"split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset","name":"fineweb-edu-dedup"},{"split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset","name":"cosmopedia-v2"}]
cuda
bf16
false
false
3
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
100
torchtune.training.metric_logging.WandBLogger
/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_optim_bwd
no_lr_optim_bwd_4_gpus_test
torchtune_lr_scheduler_tests
torchtune.models.llama3_1.llama3_1_8b
torch.optim.AdamW
true
0.00002
true
/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_optim_bwd
false
1234
true
torchtune.models.llama3.llama3_tokenizer
2048
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model
-
-
-
300
1.91847
0.00002
32.062
32.062
58.50977
Finished
-
psarthi
6m 38s
-
2
torchtune.training.FullModelHFCheckpointer
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/
["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]
LLAMA3
/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_no_optim_bwd
true
["tok_embeddings","output"]
[{"name":"fineweb-edu-dedup","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"},{"name":"cosmopedia-v2","split":"train","source":"HuggingFaceTB/smollm-corpus","_component_":"torchtune.datasets.text_completion_dataset"}]
cuda
bf16
false
false
3
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
100
torchtune.training.metric_logging.WandBLogger
/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_no_optim_bwd
no_lr_schd_no_optim_bwd_4_gpus_test
torchtune_lr_scheduler_tests
torchtune.models.llama3_1.llama3_1_8b
torch.optim.AdamW
true
0.00002
false
/projects/data/parth/llama_3_1_lr_scheduler_test/no_lr_no_optim_bwd
false
1234
true
torchtune.models.llama3.llama3_tokenizer
2048
/projects/data/parth/Meta-Llama-3.1-8B-Instruct/original/tokenizer.model
-
-
-
300
2.44195
0.00002
27.82121
27.82121
43.7793
1-5
of 5