Jcummings's workspace
Runs
2
State
Notes
User
Tags
Created
Runtime
Sweep
batch_size
checkpointer._component_
checkpointer.checkpoint_dir
checkpointer.checkpoint_files
checkpointer.model_type
checkpointer.output_dir
compile
custom_sharded_layers
dataset._component_
dataset.packed
device
dtype
enable_activation_checkpointing
enable_activation_offloading
epochs
gradient_accumulation_steps
log_every_n_steps
log_peak_memory_stats
loss._component_
lr_scheduler._component_
lr_scheduler.num_warmup_steps
metric_logger._component_
metric_logger.project
model._component_
model.apply_lora_to_mlp
model.apply_lora_to_output
model.lora_alpha
model.lora_attn_modules
model.lora_dropout
model.lora_rank
optimizer._component_
optimizer.fused
optimizer.lr
optimizer.weight_decay
output_dir
profiler._component_
profiler.active_steps
profiler.cpu
profiler.cuda
profiler.enabled
profiler.num_cycles
profiler.output_dir
profiler.profile_memory
profiler.record_shapes
Killed
jcummings
2m 8s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Meta-Llama-3-70B-Instruct
["model-00001-of-00030.safetensors","model-00002-of-00030.safetensors","model-00003-of-00030.safetensors","model-00004-of-00030.safetensors","model-00005-of-00030.safetensors","model-00006-of-00030.safetensors","model-00007-of-00030.safetensors","model-00008-of-00030.safetensors","model-00009-of-00030.safetensors","model-00010-of-00030.safetensors","model-00011-of-00030.safetensors","model-00012-of-00030.safetensors","model-00013-of-00030.safetensors","model-00014-of-00030.safetensors","model-00015-of-00030.safetensors","model-00016-of-00030.safetensors","model-00017-of-00030.safetensors","model-00018-of-00030.safetensors","model-00019-of-00030.safetensors","model-00020-of-00030.safetensors","model-00021-of-00030.safetensors","model-00022-of-00030.safetensors","model-00023-of-00030.safetensors","model-00024-of-00030.safetensors","model-00025-of-00030.safetensors","model-00026-of-00030.safetensors","model-00027-of-00030.safetensors","model-00028-of-00030.safetensors","model-00029-of-00030.safetensors","model-00030-of-00030.safetensors"]
LLAMA3
/tmp/Meta-Llama-3-70B-Instruct
false
-
torchtune.datasets.alpaca_dataset
false
cuda
bf16
true
false
1
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
100
torchtune.training.metric_logging.WandBLogger
llama3-sharded
torchtune.models.llama3.lora_llama3_70b
true
false
32
["q_proj","v_proj","output_proj"]
0
16
torch.optim.AdamW
true
0.0003
0.01
/tmp/lora_finetune_output
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/lora_finetune_output/profiling_outputs
false
true
Crashed
jcummings
3m
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Meta-Llama-3-70B-Instruct
["model-00001-of-00030.safetensors","model-00002-of-00030.safetensors","model-00003-of-00030.safetensors","model-00004-of-00030.safetensors","model-00005-of-00030.safetensors","model-00006-of-00030.safetensors","model-00007-of-00030.safetensors","model-00008-of-00030.safetensors","model-00009-of-00030.safetensors","model-00010-of-00030.safetensors","model-00011-of-00030.safetensors","model-00012-of-00030.safetensors","model-00013-of-00030.safetensors","model-00014-of-00030.safetensors","model-00015-of-00030.safetensors","model-00016-of-00030.safetensors","model-00017-of-00030.safetensors","model-00018-of-00030.safetensors","model-00019-of-00030.safetensors","model-00020-of-00030.safetensors","model-00021-of-00030.safetensors","model-00022-of-00030.safetensors","model-00023-of-00030.safetensors","model-00024-of-00030.safetensors","model-00025-of-00030.safetensors","model-00026-of-00030.safetensors","model-00027-of-00030.safetensors","model-00028-of-00030.safetensors","model-00029-of-00030.safetensors","model-00030-of-00030.safetensors"]
LLAMA3
/tmp/Meta-Llama-3-70B-Instruct
false
["tok_embeddings","output"]
torchtune.datasets.alpaca_dataset
false
cuda
bf16
true
false
1
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
100
torchtune.training.metric_logging.WandBLogger
llama3-sharded
torchtune.models.llama3.lora_llama3_70b
true
false
32
["q_proj","v_proj","output_proj"]
0
16
torch.optim.AdamW
true
0.0003
0.01
/tmp/lora_finetune_output
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/lora_finetune_output/profiling_outputs
false
true
1-2
of 2