Ebs's workspace
Runs
23
Name
2 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
batch_size
checkpointer._component_
checkpointer.checkpoint_dir
checkpointer.checkpoint_files
checkpointer.model_type
checkpointer.output_dir
clip_grad_norm
compile
dataset._component_
dataset.packed
device
dtype
enable_activation_checkpointing
enable_activation_offloading
epochs
gradient_accumulation_steps
log_every_n_steps
log_peak_memory_stats
loss._component_
lr
max_steps_per_epoch
metric_logger._component_
metric_logger.log_dir
metric_logger.name
metric_logger.project
model._component_
optimizer._component_
optimizer.fused
optimizer.lr
optimizer_in_bwd
output_dir
profiler._component_
profiler.active_steps
profiler.cpu
profiler.cuda
profiler.enabled
profiler.num_cycles
profiler.output_dir
profiler.profile_memory
profiler.record_shapes
profiler.wait_steps
profiler.warmup_steps
profiler.with_flops
profiler.with_stack
Finished
-
ebs
2m 42s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
100
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
clip-grad-norm-rank0
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.0003
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
2m 36s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
100
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
clip-grad-norm-full-tensor
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.0003
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Crashed
-
ebs
28s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
250
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
-
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.0003
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Crashed
-
ebs
1m
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
250
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
-
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.0003
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Killed
-
ebs
1m 34s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
250
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
-
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.0003
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
5m 31s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
250
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
llama32-1b-lora-distributed-unscaled-largelr-cosscheduler
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
5m 37s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
250
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
llama32-1b-lora-distributed-largescaler-largelr-cosscheduler
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
5m 50s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
250
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
llama32-1b-lora-distributed-largescaler-largelr
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
5m 49s
-
2
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
250
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora/logs
llama32-1b-lora-distributed-noscaler-largelr
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora/profiling_outputs
false
true
5
3
false
false
Killed
-
ebs
5m 43s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora_single_device
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
500
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora_single_device/logs
llama32-1b-lora-single-noscaler-largelr
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora_single_device
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
7m 46s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora_single_device
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
500
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora_single_device/logs
llama32-1b-lora-single-largescaler-largelr
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora_single_device
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs
false
true
5
3
false
false
Failed
-
ebs
4s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora_single_device
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
500
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora_single_device/logs
llama32-1b-lora-single-largescaler-largelr
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora_single_device
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs
false
true
5
3
false
false
Failed
-
ebs
1s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/lora_single_device
inf
false
torchtune.datasets.alpaca_cleaned_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
500
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/lora_single_device/logs
llama32-1b-lora-single-largescaler-largelr
test-2172
torchtune.models.llama3_2.lora_llama3_2_1b
torch.optim.AdamW
true
0.01
-
/tmp/torchtune/llama3_2_1B/lora_single_device
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs
false
true
5
3
false
false
Failed
-
ebs
6s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/full_single_device
inf
false
torchtune.datasets.alpaca_dataset
false
cuda
bf16
false
false
1
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
100
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/full_single_device/logs
llama32-1b-singledevice-scaled
test-2172
torchtune.models.llama3_2.llama3_2_1b
torch.optim.AdamW
true
0.00002
false
/tmp/torchtune/llama3_2_1B/full_single_device
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/full_single_device/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
23s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/full_single_device
inf
false
torchtune.datasets.alpaca_dataset
false
cuda
bf16
false
false
1
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
100
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/full_single_device/logs
llama32-1b-singledevice-unscaled
test-2172
torchtune.models.llama3_2.llama3_2_1b
torch.optim.AdamW
true
0.00002
false
/tmp/torchtune/llama3_2_1B/full_single_device
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/full_single_device/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
20s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/full_single_device
inf
false
torchtune.datasets.alpaca_dataset
false
cuda
bf16
false
false
1
1
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
100
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/full_single_device/logs
llama32-1b-singledevice-unscaled
test-2172
torchtune.models.llama3_2.llama3_2_1b
torch.optim.AdamW
true
0.00002
false
/tmp/torchtune/llama3_2_1B/full_single_device
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/full_single_device/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
4m 28s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/full
inf
false
torchtune.datasets.alpaca_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
1000
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/full/logs
llama32-1b-large-scaler-adam-unfused
test-2172
torchtune.models.llama3_2.llama3_2_1b
torch.optim.AdamW
false
0.00002
false
/tmp/torchtune/llama3_2_1B/full
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/full/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
4m 26s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/full
inf
false
torchtune.datasets.alpaca_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
1000
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/full/logs
llama32-1b-baseline-adam-unfused
test-2172
torchtune.models.llama3_2.llama3_2_1b
torch.optim.AdamW
false
0.00002
false
/tmp/torchtune/llama3_2_1B/full
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/full/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
4m 17s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/full
inf
false
torchtune.datasets.alpaca_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
1000
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/full/logs
llama32-1b-large-scaler-sgd
test-2172
torchtune.models.llama3_2.llama3_2_1b
torch.optim.SGD
-
0.00002
false
/tmp/torchtune/llama3_2_1B/full
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/full/profiling_outputs
false
true
5
3
false
false
Finished
-
ebs
4m 18s
-
4
torchtune.training.FullModelHFCheckpointer
/tmp/Llama-3.2-1B-Instruct/
["model.safetensors"]
LLAMA3_2
/tmp/torchtune/llama3_2_1B/full
inf
false
torchtune.datasets.alpaca_dataset
false
cuda
bf16
false
false
1
8
1
true
torchtune.modules.loss.CEWithChunkedOutputLoss
-
1000
torchtune.training.metric_logging.WandBLogger
/tmp/torchtune/llama3_2_1B/full/logs
llama32-1b-baseline-sgd
test-2172
torchtune.models.llama3_2.llama3_2_1b
torch.optim.SGD
-
0.00002
false
/tmp/torchtune/llama3_2_1B/full
torchtune.training.setup_torch_profiler
2
true
true
false
1
/tmp/torchtune/llama3_2_1B/full/profiling_outputs
false
true
5
3
false
false
1-20
of 23