Akshaykalkunte's workspace
Runs
1,096
Name
3 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
batch.batch_size
batch.breadth_first_micro_batches
batch.depth_first_micro_batches
batch.micro_batch_size
batch.micro_sequence_length
batch.sequence_length
batch.sequential_micro_batches
data.fim.rate
data.format
data.path
data.split
model.base_model.cross_entropy_impl
model.base_model.init_method_std_embed
model.base_model.tie_word_embeddings
model.base_model.transformer.activation_type
model.base_model.transformer.add_linear_biases
model.base_model.transformer.ffn_hidden_size
model.base_model.transformer.gated
model.base_model.transformer.head_groups
model.base_model.transformer.hidden_size
model.base_model.transformer.init_method_std
model.base_model.transformer.init_method_std_attn_proj
model.base_model.transformer.init_method_std_mlp_1
model.base_model.transformer.init_method_std_mlp_2
model.base_model.transformer.init_method_std_qkv
model.base_model.transformer.kv_channels
model.base_model.transformer.mlp_lr_scale
model.base_model.transformer.normalization.type
model.base_model.transformer.num_attention_heads
model.base_model.transformer.num_layers
model.base_model.transformer.rotary.theta
model.base_model.transformer.rotary.type
model.base_model.use_position_embeddings
model.base_model.vocab_size
model.distributed.distributed_timeout
model.distributed.local_world_size
model.distributed.rank
model.distributed.seed
model.distributed.training_dtype
model.distributed.world_size
model.multi_stage.num_grad_buffers
model.multi_stage.num_weight_buffers
model.multi_stage.zero_stage
optimizer.beta_2
Running
Add notes...
tscholak
2d 18h 58m 16s
-
64
1
8
1
-
16384
8
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
0
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Failed
Add notes...
tscholak
13m 22s
-
64
1
8
1
-
16384
8
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Failed
Add notes...
tscholak
3d 2h 8m 49s
-
64
1
8
1
-
16384
8
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Running
Add notes...
tscholak
3d 10h 53m 27s
-
64
1
8
1
-
16384
8
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
0
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Running
Add notes...
tscholak
3d 11h 18m 10s
-
64
1
8
1
-
16384
8
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Crashed
Add notes...
raymondli
1h 56m 16s
-
16
1
8
2
-
8192
8
-
-
-
[969,30,1]
fused
-
false
silu
false
4096
true
8
2048
-
-
-
-
-
128
1.0000e-12
rms_norm
16
4
1000000000
default
false
131072
-
1
0
984060
bfloat16
1
-
-
3
0.95
Finished
Add notes...
tscholak
30m 52s
-
256
1
16
1
-
16384
16
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1.0000e-12
rms_norm
32
48
1000000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Finished
Add notes...
tscholak
1h 25m 24s
-
256
1
16
1
-
16384
16
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1.0000e-12
rms_norm
32
48
1000000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Finished
Add notes...
tscholak
16m 34s
-
256
1
2
8
-
2048
2
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1.0000e-12
rms_norm
32
48
1000000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Finished
Add notes...
tscholak
1h 28m 50s
-
256
1
16
1
-
16384
16
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1.0000e-12
rms_norm
32
48
1000000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Finished
Add notes...
tscholak
36m 32s
-
256
1
16
1
-
2048
16
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1.0000e-12
rms_norm
32
48
1000000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Failed
Add notes...
tscholak
1h 18m 9s
-
128
1
32
1
-
2048
32
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
16
-
-
3
0.95
Failed
Add notes...
tscholak
1h 19m 55s
-
128
1
32
1
-
2048
32
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
16
-
-
3
0.95
Failed
Add notes...
tscholak
1h 39m 23s
-
128
1
64
1
-
2048
64
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
16
-
-
3
0.95
Finished
Add notes...
raymondli
1d 18h 36m 1s
-
16
1
8
2
-
8192
8
-
-
-
[969,30,1]
fused
-
false
silu
false
4096
true
8
2048
-
-
-
-
-
128
1.0000e-12
rms_norm
16
4
1000000000
default
false
131072
-
1
0
984060
bfloat16
1
-
-
3
0.95
Finished
Add notes...
raymondli
22h 30m 44s
-
16
1
8
1
-
8192
8
-
-
-
[969,30,1]
fused
-
false
silu
false
4096
true
8
1024
-
-
-
-
-
128
1.0000e-12
rms_norm
8
4
1000000
default
false
131072
-
2
0
984060
bfloat16
2
-
-
3
0.95
Finished
Add notes...
raymondli
1h 24m 34s
-
16
1
8
1
-
4096
8
-
-
-
[969,30,1]
fused
-
false
silu
false
4096
true
8
1024
-
-
-
-
-
128
1.0000e-12
rms_norm
8
4
1000000
default
false
131072
-
2
0
984060
bfloat16
2
-
-
3
0.95
Finished
Add notes...
tscholak
3d 21h 49m 12s
-
256
1
16
1
-
16384
16
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1.0000e-12
rms_norm
32
48
1000000000
default
false
131072
-
8
0
12345
bfloat16
64
-
-
3
0.95
Failed
Add notes...
tscholak
3d 1h 6m 55s
-
64
1
8
1
-
16384
8
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
0
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
Crashed
Add notes...
tscholak
3d 1h 7m 47s
-
64
1
8
1
-
16384
8
-
file
["???"]
[969,30,1]
fused
-
false
silu
false
14336
true
8
5120
-
-
-
-
-
128
1
rms_norm
32
50
1000000
default
false
131072
-
8
0
984060
bfloat16
64
-
-
3
0.95
1-20
of 1,096