Banyan's workspace
Runs
63
Name
14 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
activation_checkpoint.early_stop
activation_checkpoint.mode
activation_checkpoint.per_op_sac_force_recompute_mm_shapes_by_fqns
activation_checkpoint.selective_ac_option
checkpoint.async_mode
checkpoint.create_seed_checkpoint
checkpoint.enable
checkpoint.enable_first_step_checkpoint
checkpoint.exclude_from_loading
checkpoint.export_dtype
checkpoint.folder
checkpoint.initial_load_in_hf
checkpoint.initial_load_model_only
checkpoint.interval
checkpoint.keep_latest_k
checkpoint.last_save_in_hf
checkpoint.last_save_model_only
checkpoint.load_step
comm.init_timeout_seconds
comm.save_traces_folder
comm.trace_buf_size
comm.train_timeout_seconds
compile.components
compile.enable
experimental.custom_args_module
experimental.custom_import
fault_tolerance.enable
fault_tolerance.group_size
fault_tolerance.min_replica_size
fault_tolerance.process_group
fault_tolerance.process_group_timeout_ms
fault_tolerance.replica_id
float8.emulate
float8.enable_fsdp_float8_all_gather
float8.filter_fqns
float8.moe_fqns_prototype
float8.precompute_float8_dynamic_scale_for_fsdp
float8.recipe_name
job.config_file
job.dump_folder
job.print_args
job.use_for_integration_test
lr_scheduler.decay_type
lr_scheduler.min_lr_factor
Killed
banyan
2d 18h 37m 56s
-
-
selective
["moe.router.gate"]
2
disabled
false
true
false
[]
float32
checkpoint
false
true
1100
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
2d 19m 39s
-
-
selective
["moe.router.gate"]
2
disabled
false
true
false
[]
float32
checkpoint
false
true
1100
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
2d 3h 53m 41s
-
-
selective
["moe.router.gate"]
1
disabled
false
true
false
[]
float32
checkpoint
false
true
1100
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
3h 13m 11s
-
-
selective
["moe.router.gate"]
3
disabled
false
true
false
[]
float32
checkpoint
false
true
2000
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
1h 9m 35s
-
-
selective
["moe.router.gate"]
2
disabled
false
true
false
[]
float32
checkpoint
false
true
600
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
10h 15m 1s
-
-
selective
["moe.router.gate"]
1
disabled
false
true
false
[]
float32
checkpoint
false
true
600
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
1h 45m 51s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
3h 47m 49s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
6h 54m 11s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
19h 1m 53s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
13h 58m 49s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
1d 2m 21s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
1h 55m 15s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
1d 14h 11m 51s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
37m 23s
-
-
full
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
17m 39s
-
-
selective
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
5h 49m 18s
-
-
selective
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Killed
banyan
2m 13s
-
-
selective
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Finished
banyan
1m 32s
-
-
selective
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
Finished
banyan
14s
-
-
selective
["moe.router.gate"]
op
disabled
false
true
false
[]
float32
checkpoint
false
true
500
10
false
false
-1
300
comm_traces
20000
100
["model","loss"]
true
false
0
1
gloo
10000
0
false
false
["output"]
[]
false
rowwise
./torchtitan/models/llama3/train_configs/banyan_5b_deep.toml
./outputs
false
false
linear
0
1-20
of 63