test-2172 Table – Weights & Biases

Ebs's workspace

Runs

Finished

ebs

8mo ago

2m 42s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

clip-grad-norm-rank0

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.0003

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Finished

ebs

8mo ago

2m 36s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

clip-grad-norm-full-tensor

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.0003

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Crashed

ebs

8mo ago

28s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

250

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.0003

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Crashed

ebs

8mo ago

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

250

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.0003

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Killed

ebs

8mo ago

1m 34s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

250

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.0003

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Finished

ebs

8mo ago

5m 31s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

250

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

llama32-1b-lora-distributed-unscaled-largelr-cosscheduler

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Finished

ebs

8mo ago

5m 37s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

250

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

llama32-1b-lora-distributed-largescaler-largelr-cosscheduler

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Finished

ebs

8mo ago

5m 50s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

250

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

llama32-1b-lora-distributed-largescaler-largelr

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Finished

ebs

8mo ago

5m 49s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

250

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora/logs

llama32-1b-lora-distributed-noscaler-largelr

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora/profiling_outputs

false

true

false

Killed

ebs

8mo ago

5m 43s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora_single_device

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

500

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora_single_device/logs

llama32-1b-lora-single-noscaler-largelr

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora_single_device

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs

false

true

false

Finished

ebs

8mo ago

7m 46s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora_single_device

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

500

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora_single_device/logs

llama32-1b-lora-single-largescaler-largelr

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora_single_device

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs

false

true

false

Failed

ebs

8mo ago

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora_single_device

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

500

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora_single_device/logs

llama32-1b-lora-single-largescaler-largelr

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora_single_device

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs

false

true

false

Failed

ebs

8mo ago

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/lora_single_device

inf

false

torchtune.datasets.alpaca_cleaned_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

500

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/lora_single_device/logs

llama32-1b-lora-single-largescaler-largelr

test-2172

torchtune.models.llama3_2.lora_llama3_2_1b

torch.optim.AdamW

true

0.01

/tmp/torchtune/llama3_2_1B/lora_single_device

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/lora_single_device/profiling_outputs

false

true

false

Failed

ebs

8mo ago

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/full_single_device

inf

false

torchtune.datasets.alpaca_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/full_single_device/logs

llama32-1b-singledevice-scaled

test-2172

torchtune.models.llama3_2.llama3_2_1b

torch.optim.AdamW

true

0.00002

false

/tmp/torchtune/llama3_2_1B/full_single_device

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/full_single_device/profiling_outputs

false

true

false

Finished

ebs

8mo ago

23s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/full_single_device

inf

false

torchtune.datasets.alpaca_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/full_single_device/logs

llama32-1b-singledevice-unscaled

test-2172

torchtune.models.llama3_2.llama3_2_1b

torch.optim.AdamW

true

0.00002

false

/tmp/torchtune/llama3_2_1B/full_single_device

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/full_single_device/profiling_outputs

false

true

false

Finished

ebs

8mo ago

20s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/full_single_device

inf

false

torchtune.datasets.alpaca_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

100

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/full_single_device/logs

llama32-1b-singledevice-unscaled

test-2172

torchtune.models.llama3_2.llama3_2_1b

torch.optim.AdamW

true

0.00002

false

/tmp/torchtune/llama3_2_1B/full_single_device

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/full_single_device/profiling_outputs

false

true

false

Finished

ebs

8mo ago

4m 28s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/full

inf

false

torchtune.datasets.alpaca_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

1000

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/full/logs

llama32-1b-large-scaler-adam-unfused

test-2172

torchtune.models.llama3_2.llama3_2_1b

torch.optim.AdamW

false

0.00002

false

/tmp/torchtune/llama3_2_1B/full

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/full/profiling_outputs

false

true

false

Finished

ebs

8mo ago

4m 26s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/full

inf

false

torchtune.datasets.alpaca_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

1000

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/full/logs

llama32-1b-baseline-adam-unfused

test-2172

torchtune.models.llama3_2.llama3_2_1b

torch.optim.AdamW

false

0.00002

false

/tmp/torchtune/llama3_2_1B/full

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/full/profiling_outputs

false

true

false

Finished

ebs

8mo ago

4m 17s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/full

inf

false

torchtune.datasets.alpaca_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

1000

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/full/logs

llama32-1b-large-scaler-sgd

test-2172

torchtune.models.llama3_2.llama3_2_1b

torch.optim.SGD

0.00002

false

/tmp/torchtune/llama3_2_1B/full

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/full/profiling_outputs

false

true

false

Finished

ebs

8mo ago

4m 18s

torchtune.training.FullModelHFCheckpointer

/tmp/Llama-3.2-1B-Instruct/

["model.safetensors"]

LLAMA3_2

/tmp/torchtune/llama3_2_1B/full

inf

false

torchtune.datasets.alpaca_dataset

false

cuda

bf16

false

true

torchtune.modules.loss.CEWithChunkedOutputLoss

1000

torchtune.training.metric_logging.WandBLogger

/tmp/torchtune/llama3_2_1B/full/logs

llama32-1b-baseline-sgd

test-2172

torchtune.models.llama3_2.llama3_2_1b

torch.optim.SGD

0.00002

false

/tmp/torchtune/llama3_2_1B/full

torchtune.training.setup_torch_profiler

true

false

/tmp/torchtune/llama3_2_1B/full/profiling_outputs

false

true

false

1-20

of 23