Andreaskoepf's workspace
Runs
44
Name
2 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
DDP_impl
accumulate_allreduce_grads_in_fp32
adam_beta1
adam_beta2
adam_eps
adlr_autoresume
adlr_autoresume_interval
apply_query_key_layer_scaling
apply_residual_connection_post_layernorm
async_tensor_model_parallel_allreduce
attention_dropout
attention_softmax_in_fp32
barrier_with_L1_time
bf16
bias_dropout_fusion
bias_gelu_fusion
biencoder_projection_dim
biencoder_shared_query_context_model
classes_fraction
clip_grad
consumed_train_samples
consumed_valid_samples
data_impl
data_parallel_random_init
data_parallel_size
data_path
data_per_class_fraction
data_sharding
dataloader_type
dino_bottleneck_size
dino_freeze_last_layer
dino_head_hidden_size
dino_local_crops_number
dino_local_img_size
dino_norm_last_layer
dino_teacher_temp
dino_warmup_teacher_temp
dino_warmup_teacher_temp_epochs
distribute_saved_activations
distributed_backend
empty_unused_memory_level
encoder_num_layers
encoder_seq_length
end_weight_decay
Crashed
-
andreaskoepf
10h 7m 51s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/orca_megacode_oasst_best_codellama/orca_megacode_oasst_best-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Finished
-
andreaskoepf
23m 55s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
true
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
4
["/pure-mlo-scratch/akoepf/data/oasst_top1_2023-07-23_llama2/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
32
1024
0.000001
Crashed
-
andreaskoepf
11h 25m 41s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/oasst_top1_2023-07-23_llama2/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
1024
0.000001
Finished
-
andreaskoepf
37m 40s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
true
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
4
["/pure-mlo-scratch/akoepf/data/oasst_top1_2023-07-23_llama2/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
32
1024
0.000001
Finished
-
andreaskoepf
9h 57m 3s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/orcamegacode_best_llama2/orcamegacode_best-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Crashed
-
andreaskoepf
20m 20s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/orcamegacode_best_llama2/orcamegacode_best-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Crashed
-
andreaskoepf
1h 12m 47s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/oasst_top1_2023-07-23_llama2/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
80
4096
0.000001
Crashed
-
andreaskoepf
35m 17s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/oasst_pre10_min25_llama2/oasst_sft10-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
80
4096
0.000001
Crashed
-
andreaskoepf
7h 43m 50s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/oasst_pre10_min25_llama2/oasst_sft10-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Finished
-
andreaskoepf
23m 43s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
true
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
4
["/pure-mlo-scratch/akoepf/data/oasst_top1_2023-07-23_falcon/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
32
1024
0.000001
Crashed
-
andreaskoepf
1d 55m 32s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/megacode3_min100_llama2/megacode3-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Finished
-
andreaskoepf
12h 22m 2s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/orcabest_min50_llama2/orcabest-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Finished
-
andreaskoepf
1h 30m 17s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/oasst_top1_2023-07-23_falcon/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
60
2048
0.000001
Finished
-
andreaskoepf
20h 32m 22s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/megacode2_min100_falcon/megacode2-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
60
2048
0.000001
Finished
-
andreaskoepf
20m 3s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
true
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
4
["/pure-mlo-scratch/akoepf/data/oasst_top1_2023-07-23_falcon/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
32
1024
0.000001
Finished
-
andreaskoepf
11h 45m 17s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/megacode2_min100/megacode2-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Crashed
-
andreaskoepf
1h 10m 31s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
1
["/pure-mlo-scratch/akoepf/data/megacode2_min100/megacode2-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
40
4096
0.000001
Finished
-
andreaskoepf
32m 44s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
2
["/pure-mlo-scratch/akoepf/data/llama_oasst_top1_2023-07-23/oasst_top1-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
32
4096
0.000001
Crashed
-
andreaskoepf
2m 43s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
2
["/pure-mlo-scratch/akoepf/data/megacode2_frac05/megacode2-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
32
4096
0.000001
Killed
-
andreaskoepf
1m 32s
-
local
true
0.9
0.95
1.0000e-12
false
1000
true
false
false
0
false
true
true
false
false
0
false
1
1
0
0
infer
false
2
["/pure-mlo-scratch/akoepf/data/megacode2_frac05/megacode2-train"]
1
true
single
256
1
2048
10
96
false
0.07
0.04
30
false
nccl
0
32
4096
0.000001
1-20
of 44