Byyoung3's workspace
Runs
254
Name
3 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
_attn_implementation_autoset
_name_or_path
accelerator_config.even_batches
accelerator_config.non_blocking
accelerator_config.split_batches
accelerator_config.use_seedable_sampler
activation_function
activation_type
adafactor
adam_beta1
adam_beta2
adam_epsilon
add_cross_attention
alibi
alibi_bias_max
architecture.block_cls
architecture.mlp.mlp_cls
architectures
attention_bias
attention_dropout
attention_layer_norm
attention_layer_norm_with_affine
attn_pdrop
auto_find_batch_size
auto_map.AutoConfig
auto_map.AutoModelForCausalLM
auto_map.AutoTokenizer
average_tokens_across_devices
batch_eval_metrics
batch_size
beta
bf16
bf16_full_eval
bias_for_layer_norm
bits
block_group_size
block_type
bos_token_id
cache_dir
chars_per_token
chunk_size_feed_forward
classifier_dropout
cliprange
cliprange_value
Finished
-
byyoung3
1h 4m 56s
-
-
cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
64
-
false
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Crashed
-
byyoung3
30s
-
-
cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
64
-
false
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Crashed
-
byyoung3
45s
-
-
cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
64
-
false
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Crashed
-
byyoung3
0s
-
-
cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
64
-
false
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Crashed
-
byyoung3
15s
-
-
cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
64
-
false
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Killed
-
byyoung3
22m 15s
-
-
cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
64
-
false
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Crashed
-
byyoung3
17m 16s
-
-
cleanrl/EleutherAI_pythia-1b-deduped__sft__tldr
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
64
-
true
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Crashed
-
byyoung3
6m 31s
-
-
EleutherAI/pythia-1b-deduped
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["GPTNeoXForCausalLM"]
true
0
-
-
-
false
-
-
-
false
false
256
-
true
false
-
-
-
-
0
-
-
0
0.1
0.2
0.2
Crashed
-
byyoung3
1h 18m 58s
-
-
Qwen/Qwen3-0.6B
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen3ForCausalLM"]
false
0
-
-
-
false
-
-
-
false
false
256
-
true
false
-
-
-
-
151643
-
-
0
-
0.2
0.2
Crashed
-
byyoung3
30s
-
-
Qwen/Qwen3-0.6B
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen3ForCausalLM"]
false
0
-
-
-
false
-
-
-
false
false
-
-
true
false
-
-
-
-
151643
-
-
0
-
-
-
Crashed
-
byyoung3
30s
-
-
Qwen/Qwen3-0.6B
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen3ForCausalLM"]
false
0
-
-
-
false
-
-
-
false
false
-
-
true
false
-
-
-
-
151643
-
-
0
-
-
-
Crashed
-
byyoung3
2m 31s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Crashed
-
byyoung3
4m 16s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Finished
-
byyoung3
14m 53s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Finished
-
byyoung3
2m 59s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Failed
-
byyoung3
5s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Failed
-
byyoung3
7s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Crashed
-
byyoung3
7m 46s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Crashed
-
byyoung3
1m 46s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
Crashed
-
byyoung3
1m 1s
-
-
Qwen/Qwen2-0.5B-Instruct
true
false
false
true
-
-
false
0.9
0.999
1.0000e-8
false
-
-
-
-
["Qwen2ForCausalLM"]
-
0
-
-
-
false
-
-
-
false
false
-
0.1
false
false
-
-
-
-
151643
-
-
0
-
-
-
1-20
of 254