Upup-ashton-wang's workspace
Runs
41
Name
17 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
_attn_implementation_autoset
_name_or_path
accelerator_config.even_batches
accelerator_config.non_blocking
accelerator_config.split_batches
accelerator_config.use_seedable_sampler
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
architectures
attention_bias
attention_dropout
auto_find_batch_size
average_tokens_across_devices
batch_eval_metrics
beta
bf16
bf16_full_eval
bos_token_id
chars_per_token
chunk_size_feed_forward
dataloader_drop_last
dataloader_num_workers
dataloader_persistent_workers
dataloader_pin_memory
dataset_text_field
ddp_timeout
debug
disable_tqdm
diversity_penalty
do_eval
do_predict
do_sample
do_train
ds3_gather_for_generation
early_stopping
encoder_no_repeat_ngram_size
eos_token_id
epsilon
eval_delay
eval_do_concat_batches
eval_on_start
Crashed
-
upup-ashton-wang
8h 47m 45s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
1d 21h 38m 20s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
1d 8h 52m 6s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
8h 26m 1s
-
true
/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
9h 54m 4s
-
true
/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
1d 17h 15s
-
true
/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
14d 5h 8m 27s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
8h 13m 1s
-
true
/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
1d 19h 1m 39s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
16d 16h 4m 21s
-
true
/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
8h 28m
-
true
/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
8h 26m 16s
-
true
/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
7d 6h 29m 10s
-
true
["/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base","/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base"]
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
3d 16h 39m 11s
-
true
["/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base","/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base"]
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
7d 2h 7m 33s
-
true
["/home/omer/shangshang/project/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base","/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base"]
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Finished
-
upup-ashton-wang
12h 21m 40s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
1d 7h 53m 45s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
Crashed
-
upup-ashton-wang
14d 11h 35m 32s
-
true
/project/neiswang_1391/shangsha/reasoning/reasoning-sae/ckpts/models/DeepSeek-R1-Distill-Qwen-1.5B/base
true
false
false
true
false
0.9
0.999
1.0000e-8
false
Qwen2ForCausalLM
-
0
false
false
false
0.04
true
false
151643
-
0
false
0
false
true
-
1800
-
false
0
false
false
false
false
true
false
0
151643
-
0
true
false
1-18
of 18