Jcummings's workspace
Runs
2
State
Notes
User
Tags
Created
Runtime
Sweep
batch_size
checkpointer._component_
checkpointer.checkpoint_dir
checkpointer.checkpoint_files
checkpointer.model_type
checkpointer.output_dir
compile
custom_sharded_layers
dataset._component_
dataset.packed
device
dtype
enable_activation_checkpointing
enable_activation_offloading
epochs
gradient_accumulation_steps
log_every_n_steps
log_peak_memory_stats
loss._component_
loss.beta
loss.label_smoothing
lr_scheduler._component_
lr_scheduler.num_warmup_steps
metric_logger._component_
metric_logger.project
model._component_
model.apply_lora_to_mlp
model.apply_lora_to_output
model.lora_alpha
model.lora_attn_modules
model.lora_dropout
model.lora_rank
optimizer._component_
optimizer.fused
optimizer.lr
optimizer.weight_decay
output_dir
profiler._component_
profiler.active_steps
profiler.cpu
profiler.cuda
profiler.enabled
profiler.num_cycles
profiler.output_dir
Killed
jcummings
2m 54s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Meta-Llama-3.1-8B-Instruct/
["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]
LLAMA3
/tmp/Meta-Llama-3.1-8B-Instruct/
false
-
torchtune.datasets.stack_exchange_paired_dataset
-
cuda
bf16
true
false
1
8
1
true
torchtune.rlhf.loss.DPOLoss
0.1
0
torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
100
torchtune.training.metric_logging.WandBLogger
test-123-dpo
torchtune.models.llama3_1.lora_llama3_1_8b
true
false
16
["q_proj","v_proj","output_proj"]
0
8
torch.optim.AdamW
true
0.0005
0.05
/tmp/lora_finetune_output
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/lora_finetune_output/profiling_outputs
Killed
jcummings
2m 13s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Meta-Llama-3.1-8B-Instruct/
["model-00001-of-00004.safetensors","model-00002-of-00004.safetensors","model-00003-of-00004.safetensors","model-00004-of-00004.safetensors"]
LLAMA3
/tmp/Meta-Llama-3.1-8B-Instruct/
false
["tok_embeddings","output"]
torchtune.datasets.stack_exchange_paired_dataset
-
cuda
bf16
true
false
1
8
1
true
torchtune.rlhf.loss.DPOLoss
0.1
0
torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
100
torchtune.training.metric_logging.WandBLogger
test-123-dpo
torchtune.models.llama3_1.lora_llama3_1_8b
true
false
16
["q_proj","v_proj","output_proj"]
0
8
torch.optim.AdamW
true
0.0005
0.05
/tmp/lora_finetune_output
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/lora_finetune_output/profiling_outputs
1-2
of 2