Jiayipan's workspace
Runs
90
Name
3 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
activation_dtype
bsize
checkpointer_config
eval_bsize
eval_data_path
experiment_id
hf_repo_id
hostname
inputs_tokenizer_override
jax_distributed_initalize_config
load_model
log_freq
log_initial_step
logger_config
logger_output_dir
max_length
model_config_override
num_eval_steps
num_train_steps
optim_config
output_dir
outputs_tokenizer_override
param_dtype
physical_axis_splitting
save_initial_checkpoint
save_model_freq
sharding
shuffle_train_data
train_data_path
wandb_project
accuracy
eval_accuracy
eval_loss
gradient_norm
learning_rate
loss
param_norm
step
Finished
-
jiayipan
14h 47m 18s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3_n5/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3n5-b6subb8_th0_9-8k-fla_num_subcall_cond--5f602089887e472fa0f3bdbd2f4fd413-35508f36459111f08e6342010aa400b3
LM-Parallel/llama-hsp-v3n5-b6subb8_th0_9-8k-fla_num_subcall_cond
t1v-n-22be758f-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k-fla"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3n5-b6subb8_th0_9-8k-fla_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3_n5-b6_subb8_th0_9-ctx8k_num_subcall_cond-fla/llama-200m-hsp-v3n5-b6subb8_th0_9-8k-fla_num_subcall_cond--5f602089887e472fa0f3bdbd2f4fd413-35508f36459111f08e6342010aa400b3
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3_n5-b6_subb8_th0_9-ctx8k_num_subcall_cond-fla/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3_n5/train_hsp_beam6_subbeam8_prom0_9_num_token_cond.json
sos
0.98668
0.97644
0.08808
0.029484
5.0000e-7
0.02677
397.18353
19000
Finished
-
jiayipan
9h 41m 10s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3_n5/val_hs_num_token_cond.json
llama-200m-hs-v3n5-b5-8k-fla_num_token_cond--e440400f76004f6480af7b4081b4df65-15a462c241de11f0b36242010aa400b3
LM-Parallel/llama-hs-v3n5-b5-8k-fla_num_token_cond
t1v-n-22be758f-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k-fla"
}
100
true
{"online":true,"prefix":"llama-200m-hs-v3n5-b5-8k-fla_num_token_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3_n5-b5-ctx8k_num_token_cond-fla/llama-200m-hs-v3n5-b5-8k-fla_num_token_cond--e440400f76004f6480af7b4081b4df65-15a462c241de11f0b36242010aa400b3
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3_n5-b5-ctx8k_num_token_cond-fla/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3_n5/train_hs_beam5_num_token_cond.json
sos
0.99026
0.98638
0.057383
0.027306
5.0000e-7
0.020729
397.4628
19000
Finished
-
jiayipan
20h 41m 51s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3_n5/val_hsp_num_subcall_cond.json
llama-600m-hsp-v3n5-b10subb5_th0_6-8k_num_subcall_cond--c758c41214c540da9ec08445200de11c-92acc48c412d11f08aeb42010aa400b3
LM-Parallel/llama-hsp-600m-v3_n5-b10subb5_th0_6-8k_num_subcall_cond
t1v-n-22be758f-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "600m-8k"
}
100
true
{"online":true,"prefix":"llama-600m-hsp-v3n5-b10subb5_th0_6-8k_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-600m-v3_n5-b10_subb5_th0_6-ctx8k_500k_num_subcall_cond-fla/llama-600m-hsp-v3n5-b10subb5_th0_6-8k_num_subcall_cond--c758c41214c540da9ec08445200de11c-92acc48c412d11f08aeb42010aa400b3
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-600m-v3_n5-b10_subb5_th0_6-ctx8k_500k_num_subcall_cond-fla/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3_n5/train_hsp_beam10_subbeam5_prom0_6_num_subcall_cond.json
sos
0.98574
0.9804
0.045354
0.026751
5.0000e-7
0.029463
549.14661
19000
Finished
-
jiayipan
14h 39m 9s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3_n5/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3n5-b10subb5_th0_6-8k-fla_num_subcall_cond--9a9892dd0b1649548796f1eaf635dccd-f9988826412c11f0aa3042010aa4004e
LM-Parallel/llama-hsp-v3n5-b10subb5_th0_6-8k-fla_num_subcall_cond
t1v-n-f953bcdb-w-13
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k-fla"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3n5-b10subb5_th0_6-8k-fla_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3_n5-b10_subb5_th0_6-ctx8k_500k_num_subcall_cond-fla/llama-200m-hsp-v3n5-b10subb5_th0_6-8k-fla_num_subcall_cond--9a9892dd0b1649548796f1eaf635dccd-f9988826412c11f0aa3042010aa4004e
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3_n5-b10_subb5_th0_6-ctx8k_500k_num_subcall_cond-fla/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3_n5/train_hsp_beam10_subbeam5_prom0_6_num_subcall_cond.json
sos
0.98546
0.97999
0.047042
0.032988
5.0000e-7
0.030275
397.29251
19000
Finished
-
jiayipan
10h 32m 44s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3-b10_subb25_th0_7-8k-fla_num_subcall_cond--de22ec3d1e88464c944f5d02b6096bd4-256b3dba3fe611f083a542010aa40091
LM-Parallel/llama-hsp-v3-b10_subb25_th0_7-8k-fla_num_subcall_cond
t1v-n-861f7285-w-4
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k-fla"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-b10_subb25_th0_7-8k-fla_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-b10_subb25_th0_7-ctx8k_500k_num_subcall_cond-fla/llama-200m-hsp-v3-b10_subb25_th0_7-8k-fla_num_subcall_cond--de22ec3d1e88464c944f5d02b6096bd4-256b3dba3fe611f083a542010aa40091
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-b10_subb25_th0_7-ctx8k_500k_num_subcall_cond-fla/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam25_prom0_7_500k_num_subcall_cond.json
sos
0.97818
0.97256
0.083536
0.050738
5.0000e-7
0.047658
397.90881
19000
Finished
-
jiayipan
20h 2m 25s
-
fp32
128
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3-b10_subb20_th0_5-8k_num_subcall_cond--96fa166324e64a1685f0967f0f45c4fb-62556fca3faa11f0acf842010aa4004e
LM-Parallel/llama-hsp-v3-b10_subb20_th0_5-8k_num_subcall_cond
t1v-n-f953bcdb-w-13
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-b10_subb20_th0_5-8k_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-b10_subb20_prom0_5-ctx8k_500k_num_subcall_cond/llama-200m-hsp-v3-b10_subb20_th0_5-8k_num_subcall_cond--96fa166324e64a1685f0967f0f45c4fb-62556fca3faa11f0acf842010aa4004e
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-b10_subb20_prom0_5-ctx8k_500k_num_subcall_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam20_prom0_5_500k_num_subcall_cond.json
sos
0.97831
0.97156
0.082397
0.048988
5.0000e-7
0.047695
398.59048
19000
Finished
-
jiayipan
10h 2m 22s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3-b10_subb20_th0_5-8k-fla_num_subcall_cond--b5914e478b7946fd99dcabf3f9bec8eb-50aedb763faa11f0979542010aa400b3
LM-Parallel/llama-hsp-v3-b10_subb20_th0_5-8k-fla_num_subcall_cond
t1v-n-22be758f-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k-fla"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-b10_subb20_th0_5-8k-fla_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-b10_subb20_prom0_5-ctx8k_500k_num_subcall_cond-fla/llama-200m-hsp-v3-b10_subb20_th0_5-8k-fla_num_subcall_cond--b5914e478b7946fd99dcabf3f9bec8eb-50aedb763faa11f0979542010aa400b3
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-b10_subb20_prom0_5-ctx8k_500k_num_subcall_cond-fla/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam20_prom0_5_500k_num_subcall_cond.json
sos
0.97655
0.97158
0.080439
0.047455
5.0000e-7
0.051803
398.07639
19000
Finished
-
jiayipan
7h 16m 53s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hs_num_token_cond.json
llama-600m-hs-v3-num_token_cond--b18ce824841043b7acb5497e496f91d3-f2b050103dc711f08a4842010aa400b3
LM-Parallel/llama-600m-hs-v3-num_token_cond
t1v-n-22be758f-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "600m"
}
100
true
{"online":true,"prefix":"llama-600m-hs-v3-num_token_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-600m-hs-num_token_cond/llama-600m-hs-v3-num_token_cond--b18ce824841043b7acb5497e496f91d3-f2b050103dc711f08a4842010aa400b3
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-600m-hs-num_token_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hs_num_token_cond.json
sos
0.98759
0.98585
0.034482
0.03109
5.0000e-7
0.028342
550.30664
19000
Finished
-
jiayipan
7h 33m 10s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-600m-hsp-v3-beam10_subbeam15_num_subcall_cond--2a3cda70572941c9a689e027c3abe595-22f4bd103d7311f0927342010aa400b3
LM-Parallel/llama-600m-hsp-v3-beam10_subbeam15_num_subcall_cond
t1v-n-22be758f-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "600m"
}
100
true
{"online":true,"prefix":"llama-600m-hsp-v3-beam10_subbeam15_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-600m-hsp-v3-beam10_subbeam15_num_subcall_cond/llama-600m-hsp-v3-beam10_subbeam15_num_subcall_cond--2a3cda70572941c9a689e027c3abe595-22f4bd103d7311f0927342010aa400b3
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-600m-hsp-v3-beam10_subbeam15_num_subcall_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam15_num_subcall_cond.json
sos
0.98256
0.97496
0.059169
0.024415
5.0000e-7
0.037174
549.39691
19000
Finished
-
jiayipan
18h 38m 45s
-
fp32
128
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3/val_hs_ctx8192_num_token_cond.json
llama-200m-hs-v3-beam10-ctx8k-num_token_cond--437b9c444b614b15869e811ab3e3701e-057587543cfd11f0adb542010aa4004e
LM-Parallel/llama-hs-v3-beam10-ctx8k-num_token_cond
t1v-n-f953bcdb-w-13
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k"
}
100
true
{"online":true,"prefix":"llama-200m-hs-v3-beam10-ctx8k-num_token_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8k-num_token_cond/llama-200m-hs-v3-beam10-ctx8k-num_token_cond--437b9c444b614b15869e811ab3e3701e-057587543cfd11f0adb542010aa4004e
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8k-num_token_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hs_beam10_ctx8192_num_token_cond.json
sos
0.98728
0.98222
0.070269
0.028397
5.0000e-7
0.025617
396.97855
19000
Failed
-
jiayipan
56s
-
fp32
128
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
256
gcs://jiayi-eu/data/v3/val_hs_ctx8192_num_token_cond.json
llama-200m-hs-v3-beam10-ctx8k-num_token_cond--5bec825818a54319944caa3439685731-729ef4203cfb11f0868942010aa4004e
LM-Parallel/llama-hs-v3-beam10-ctx8k-num_token_cond
t1v-n-f953bcdb-w-13
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k"
}
100
true
{"online":true,"prefix":"llama-200m-hs-v3-beam10-ctx8k-num_token_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8k-num_token_cond/llama-200m-hs-v3-beam10-ctx8k-num_token_cond--5bec825818a54319944caa3439685731-729ef4203cfb11f0868942010aa4004e
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8k-num_token_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hs_beam10_ctx8192_num_token_cond.json
sos
-
-
-
-
-
-
-
-
Failed
-
jiayipan
30s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hs_ctx8192_num_token_cond.json
llama-200m-hs-v3-beam10-ctx8k-num_token_cond--cb11ecd5717e45fab9424dbdb2c7e885-79c2cff63cce11f08e8342010aa4004e
LM-Parallel/llama-hs-v3-beam10-ctx8k-num_token_cond
t1v-n-f953bcdb-w-13
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m-8k"
}
100
true
{"online":true,"prefix":"llama-200m-hs-v3-beam10-ctx8k-num_token_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8k-num_token_cond/llama-200m-hs-v3-beam10-ctx8k-num_token_cond--cb11ecd5717e45fab9424dbdb2c7e885-79c2cff63cce11f08e8342010aa4004e
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8k-num_token_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hs_beam10_ctx8192_num_token_cond.json
sos
-
-
-
-
-
-
-
-
Failed
-
jiayipan
10s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hs_ctx8192_num_token_cond.json
llama-200m-hs-v3-beam10-ctx8192-num_token_cond--c6dc041315dc4f41aaf621be1a498d9c-8e8d84ae3ccd11f0964d42010aa4004e
LM-Parallel/llama-hs-v3-beam10-ctx8192-num_token_cond
t1v-n-f953bcdb-w-13
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hs-v3-beam10-ctx8192-num_token_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8192-num_token_cond/llama-200m-hs-v3-beam10-ctx8192-num_token_cond--c6dc041315dc4f41aaf621be1a498d9c-8e8d84ae3ccd11f0964d42010aa4004e
8192
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam10-ctx8192-num_token_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hs_beam10_ctx8192_num_token_cond.json
sos
-
-
-
-
-
-
-
-
Finished
-
jiayipan
10h 51m 44s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3-b10_sub15_b15_5perc_num_subcall_cond--3ec688b7f7c0446d9f9f50fca2975743-94d600a01a6511f0bc7642010aa40065
LM-Parallel/llama-hsp-v3-beam10_subbeam15_500k_beam15_25k_num_subcall_cond
t1v-n-f953bcdb-w-8
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-b10_sub15_b15_5perc_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15_500k_beam15_25k_num_subcall_cond/llama-200m-hsp-v3-b10_sub15_b15_5perc_num_subcall_cond--3ec688b7f7c0446d9f9f50fca2975743-94d600a01a6511f0bc7642010aa40065
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15_500k_beam15_25k_num_subcall_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam15_500k_beam15_25k_num_subcall_cond.json
sos
0.9816
0.97447
0.060061
0.036486
5.0000e-7
0.039771
397.39514
19000
Finished
-
jiayipan
10h 51m 47s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3-b10_sub15_b15_1perc_num_subcall_cond--dddc015c29174c38b390805d4c04fee0-f4c94b7011e111f0979342010aa4004a
LM-Parallel/llama-hsp-v3-beam10_subbeam15_500k_beam15_5k_num_subcall_cond
t1v-n-f953bcdb-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-b10_sub15_b15_1perc_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15_500k_beam15_5k_num_subcall_cond/llama-200m-hsp-v3-b10_sub15_b15_1perc_num_subcall_cond--dddc015c29174c38b390805d4c04fee0-f4c94b7011e111f0979342010aa4004a
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15_500k_beam15_5k_num_subcall_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam15_500k_beam15_5k_num_subcall_cond.json
sos
0.98005
0.97452
0.059953
0.045706
5.0000e-7
0.043398
397.38687
19000
Finished
-
jiayipan
10h 32m 5s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hs.json
llama-200m-hs-v3-beam15--682df75bb5fa4befbc7f24b7419b646a-f1b1ffe411d811f0ac8d42010aa400b3
LM-Parallel/llama-hs-v3-beam15
t1v-n-22be758f-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hs-v3-beam15","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam15/llama-200m-hs-v3-beam15--682df75bb5fa4befbc7f24b7419b646a-f1b1ffe411d811f0ac8d42010aa400b3
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hs-v3-beam15/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hs_beam15.json
sos
0.98637
0.98605
0.02927
0.022875
5.0000e-7
0.027131
396.72803
19000
Finished
-
jiayipan
10h 52m 20s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hsp.json
llama-200m-hsp-v3-beam20_subbeam10--9db9606cce904635aa03eac2539b53d0-a95248be097a11f084b842010aa4004a
LM-Parallel/llama-hsp-v3-beam20_subbeam10
t1v-n-f953bcdb-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-beam20_subbeam10","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam20_subbeam10/llama-200m-hsp-v3-beam20_subbeam10--9db9606cce904635aa03eac2539b53d0-a95248be097a11f084b842010aa4004a
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam20_subbeam10/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam20_subbeam10.json
sos
0.98372
0.97309
0.062762
0.028686
5.0000e-7
0.033929
397.94052
19000
Finished
-
jiayipan
10h 51m 59s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3-beam20_subbeam10_num_subcall_cond--877b5e4d3d46421e97cab9a17ed7b37f-87012eb0097a11f0a87d42010aa400b5
LM-Parallel/llama-hsp-v3-beam20_subbeam10_num_subcall_cond
t1v-n-22be758f-w-8
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-beam20_subbeam10_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam20_subbeam10_num_subcall_cond/llama-200m-hsp-v3-beam20_subbeam10_num_subcall_cond--877b5e4d3d46421e97cab9a17ed7b37f-87012eb0097a11f0a87d42010aa400b5
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam20_subbeam10_num_subcall_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam20_subbeam10_num_subcall_cond.json
sos
0.98343
0.97387
0.062641
0.031165
5.0000e-7
0.035562
397.50021
19000
Finished
-
jiayipan
10h 51m 45s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hsp_num_subcall_cond.json
llama-200m-hsp-v3-beam10_subbeam15_num_subcall_cond--d24baf6e581d442b8cb15d8905030560-b89c0226085111f0844242010aa400b5
LM-Parallel/llama-hsp-v3-beam10_subbeam15_num_subcall_cond
t1v-n-22be758f-w-8
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-beam10_subbeam15_num_subcall_cond","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15_num_subcall_cond/llama-200m-hsp-v3-beam10_subbeam15_num_subcall_cond--d24baf6e581d442b8cb15d8905030560-b89c0226085111f0844242010aa400b5
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15_num_subcall_cond/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam15_num_subcall_cond.json
sos
0.98242
0.97458
0.059822
0.033934
5.0000e-7
0.037585
397.38071
19000
Finished
-
jiayipan
10h 51m 42s
-
fp32
256
{"save_optimizer_state":false,"save_float_dtype":"bf16"}
512
gcs://jiayi-eu/data/v3/val_hsp.json
llama-200m-hsp-v3-beam10_subbeam15--36fd44ba3a854a66812e45dd734fcefa-b93bbc2c084b11f086f142010aa4004a
LM-Parallel/llama-hsp-v3-beam10_subbeam15
t1v-n-f953bcdb-w-2
{}
{}
paths:{
"tokenizer": "meta-llama/Llama-2-7b-hf",
"default_config_name": "200m"
}
100
true
{"online":true,"prefix":"llama-200m-hsp-v3-beam10_subbeam15","prefix_to_id":true}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15/llama-200m-hsp-v3-beam10_subbeam15--36fd44ba3a854a66812e45dd734fcefa-b93bbc2c084b11f086f142010aa4004a
4096
{"bos_token_id":1,"eos_token_id":2,"pad_token_id":0,"remat_block":"nothing_saveable"}
500
19000
adamw:{
"init_lr": 5e-6,
"end_lr": 5e-7,
"lr": 5e-5,
"lr_warmup_steps": 1,
"lr_decay_steps": 19000,
"b1": 0.9,
"b2": 0.999,
"clip_gradient": 1.0,
"weight_decay": 0.01,
"bf16_momentum": false,
"multiply_by_parameter_scale": false,
"weight_decay_exclusions": [],
"schedule": "cos",
"grad_accum_steps": 1
}
gcs://jiayi-eu/lm-parallel-exp/exp-hsp-v3-beam10_subbeam15/
{}
fp32
false
false
100000000
-1,1,1
false
gcs://jiayi-eu/data/v3/train_hsp_beam10_subbeam15.json
sos
0.98195
0.97393
0.060108
0.033995
5.0000e-7
0.037856
397.59268
19000
1-20
of 90