Husein-mesolitica's workspace
Runs
6
State
Notes
User
Tags
Created
Runtime
Sweep
_name_or_path
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
architectures
auto_find_batch_size
auto_map.AutoModel
bf16
bf16_full_eval
chunk_size_feed_forward
d_model
dataloader_drop_last
dataloader_num_workers
dataloader_pin_memory
ddp_timeout
debug
deepspeed.bf16.enabled
deepspeed.comms_logger.debug
deepspeed.comms_logger.enabled
deepspeed.fp16.enabled
deepspeed.fp16.hysteresis
deepspeed.fp16.initial_scale_power
deepspeed.fp16.loss_scale
deepspeed.fp16.loss_scale_window
deepspeed.fp16.min_loss_scale
deepspeed.gradient_accumulation_steps
deepspeed.gradient_clipping
deepspeed.optimizer.params.betas
deepspeed.optimizer.params.eps
deepspeed.optimizer.params.lr
deepspeed.optimizer.params.weight_decay
deepspeed.optimizer.type
deepspeed.scheduler.params.total_num_steps
deepspeed.scheduler.params.warmup_max_lr
deepspeed.scheduler.params.warmup_min_lr
deepspeed.scheduler.params.warmup_num_steps
deepspeed.scheduler.type
deepspeed.steps_per_print
deepspeed.train_batch_size
deepspeed.train_micro_batch_size_per_gpu
deepspeed.wall_clock_breakdown
deepspeed.zero_optimization.contiguous_gradients
Crashed
-
husein-mesolitica
1d 2h 28m 57s
-
huseinzol05/dummy-mamba-1.4b
false
0.9
0.999
1.0000e-8
false
["MambaLMHeadModel"]
false
huseinzol05/dummy-mamba-1.4b--modeling.MambaLMHeadModel
true
false
0
2048
false
0
true
1800
[]
auto
true
true
auto
2
16
0
1000
1
auto
auto
auto
auto
auto
auto
AdamW
auto
auto
auto
auto
WarmupDecayLR
2000
auto
auto
false
true
Crashed
-
husein-mesolitica
13h 51m 40s
-
huseinzol05/dummy-mamba-1.4b
false
0.9
0.999
1.0000e-8
false
["MambaLMHeadModel"]
false
huseinzol05/dummy-mamba-1.4b--modeling.MambaLMHeadModel
true
false
0
2048
false
0
true
1800
[]
auto
true
true
auto
2
16
0
1000
1
auto
auto
auto
auto
auto
auto
AdamW
auto
auto
auto
auto
WarmupDecayLR
2000
auto
auto
false
true
Crashed
-
husein-mesolitica
17h 16m 33s
-
huseinzol05/dummy-mamba-1.4b
false
0.9
0.999
1.0000e-8
false
["MambaLMHeadModel"]
false
huseinzol05/dummy-mamba-1.4b--modeling.MambaLMHeadModel
true
false
0
2048
false
0
true
1800
[]
auto
true
true
auto
2
16
0
1000
1
auto
auto
auto
auto
auto
auto
AdamW
auto
auto
auto
auto
WarmupDecayLR
2000
auto
auto
false
true
Crashed
-
husein-mesolitica
3h 4m 1s
-
huseinzol05/dummy-mamba-1.4b
false
0.9
0.999
1.0000e-8
false
["MambaLMHeadModel"]
false
huseinzol05/dummy-mamba-1.4b--modeling.MambaLMHeadModel
true
false
0
2048
false
0
true
1800
[]
auto
true
true
auto
2
16
0
1000
1
auto
auto
auto
auto
auto
auto
AdamW
auto
auto
auto
auto
WarmupDecayLR
2000
auto
auto
false
true
Crashed
-
husein-mesolitica
13h 26m 33s
-
huseinzol05/dummy-mamba-1.4b
false
0.9
0.999
1.0000e-8
false
["MambaLMHeadModel"]
false
huseinzol05/dummy-mamba-1.4b--modeling.MambaLMHeadModel
true
false
0
2048
false
0
true
1800
[]
auto
true
true
auto
2
16
0
1000
1
auto
auto
auto
auto
auto
auto
AdamW
auto
auto
auto
auto
WarmupDecayLR
2000
auto
auto
false
true
Crashed
-
husein-mesolitica
1h 18m 55s
-
huseinzol05/dummy-mamba-1.4b
false
0.9
0.999
1.0000e-8
false
["MambaLMHeadModel"]
false
huseinzol05/dummy-mamba-1.4b--modeling.MambaLMHeadModel
true
false
0
2048
false
0
true
1800
[]
auto
true
true
auto
2
16
0
1000
1
auto
auto
auto
auto
auto
auto
AdamW
auto
auto
auto
auto
WarmupDecayLR
2000
auto
auto
false
true
1-6
of 6