Zhangshengdong's workspace
Runs
13
Name
9 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
_attn_implementation_autoset
_n_gpu
_name_or_path
accelerator_config.even_batches
accelerator_config.non_blocking
accelerator_config.split_batches
accelerator_config.use_configured_state
accelerator_config.use_seedable_sampler
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
architectures
attention_dropout
attn_implementation
auto_find_batch_size
average_tokens_across_devices
batch_eval_metrics
benchmarks
beta
bf16
bf16_full_eval
bnb_4bit_quant_type
bos_token_id
callbacks
chunk_size_feed_forward
code_language
cosine_max_len
cosine_max_value_correct
cosine_max_value_wrong
cosine_min_value_correct
cosine_min_value_wrong
dataloader_drop_last
dataloader_num_workers
dataloader_persistent_workers
dataloader_pin_memory
dataset_name
dataset_test_split
dataset_train_split
ddp_timeout
debug
disable_tqdm
diversity_penalty
Finished
-
zhangshengdong
2d 6h 49m 25s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B-Instruct
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
python
1000
1
-0.5
0.5
-1
false
0
false
true
/mnt/nfs/zsd_server/codes/open-r1/data/chinese-sft-stem-zh-hans/filter
test
train
1800
[]
false
0
Killed
-
zhangshengdong
2h 26m 22s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
python
1000
1
-0.5
0.5
-1
false
0
false
true
/mnt/nfs/zsd_server/codes/open-r1/data/chinese-sft-stem-zh-hans/filter
test
train
1800
[]
false
0
Finished
-
zhangshengdong
1d 17h 34m 31s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B-Instruct
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
python
1000
1
-0.5
0.5
-1
false
0
false
true
/mnt/nfs/zsd_server/codes/open-r1/data/Chinese-DeepSeek-R1-Distill-data-110k_filter
test
train
1800
[]
false
0
Killed
-
zhangshengdong
20h 58m 5s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B-Instruct
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
python
1000
1
-0.5
0.5
-1
false
0
false
true
/mnt/nfs/zsd_server/codes/open-r1/data/Chinese-DeepSeek-R1-Distill-data-110k_filter
test
train
1800
[]
false
0
Finished
-
zhangshengdong
19h 5m 55s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B-Instruct
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
python
1000
1
-0.5
0.5
-1
false
0
false
true
/mnt/nfs/zsd_server/codes/open-r1/data/Chinese-DeepSeek-R1-Distill-data-110k_filter
test
train
1800
[]
false
0
Killed
-
zhangshengdong
1d 1h 56m 6s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B-Instruct
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
python
1000
1
-0.5
0.5
-1
false
0
false
true
data/NuminaMath-TIR/data
test
train
1800
[]
false
0
Finished
-
zhangshengdong
2d 15h 33m 36s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
-
1000
1
-0.5
0.5
-1
false
0
false
true
data/NuminaMath-TIR/data
test
train
1800
[]
false
0
Finished
-
zhangshengdong
1d 14h 48m 22s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
-
1000
1
-0.5
0.5
-1
false
0
false
true
data/NuminaMath-TIR/data
test
train
1800
[]
false
0
Killed
-
zhangshengdong
2d 12h 27m 44s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
-
1000
1
-0.5
0.5
-1
false
0
false
true
data/NuminaMath-TIR/data
test
train
1800
[]
false
0
Killed
-
zhangshengdong
5h 40m 56s
-
true
1
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
false
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
flash_attention_2
false
false
false
[]
0.04
true
false
nf4
151643
[]
0
-
1000
1
-0.5
0.5
-1
false
0
false
true
data/NuminaMath-TIR/data
test
train
1800
[]
false
0
Killed
-
zhangshengdong
17h 6m 7s
-
true
-
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
-
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
-
false
false
false
[]
0.04
true
false
-
151643
[]
0
-
-
-
-
-
-
false
0
false
true
-
-
-
1800
[]
false
0
Finished
-
zhangshengdong
4h 1m 7s
-
true
-
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
-
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
-
false
false
false
[]
0.04
true
false
-
151643
[]
0
-
-
-
-
-
-
false
0
false
true
-
-
-
1800
[]
false
0
Killed
-
zhangshengdong
1h 47m 30s
-
true
-
/mnt/nfs/zsd_server/models/huggingface/Qwen2.5-7B
true
false
false
-
true
false
0.9
0.999
1.0000e-8
false
["Qwen2ForCausalLM"]
0
-
false
false
false
[]
0.04
true
false
-
151643
[]
0
-
-
-
-
-
-
false
0
false
true
-
-
-
1800
[]
false
0
1-13
of 13