Sorry's workspace
Runs 
366
Name
1 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
distributed.gradient_accumulation_steps
distributed.gradient_clipping
distributed.mixed_precision
distributed.num_gpus
distributed.offload_optimizer_device
distributed.offload_param_device
distributed.zero_stage
method.alpha
method.awac_scale
method.beta
method.chunk_size
method.cliprange
method.cliprange_reward
method.cliprange_value
method.cql_scale
method.gamma
method.gen_kwargs.beta
method.gen_kwargs.do_sample
method.gen_kwargs.max_new_tokens
method.gen_kwargs.temperature
method.gen_kwargs.top_k
method.gen_kwargs.top_p
method.horizon
method.init_kl_coef
method.lam
method.name
method.num_rollouts
method.ppo_epochs
method.scale_reward
method.steps_for_target_q_sync
method.target
method.tau
method.two_qs
method.vf_coef
model.model_arch_type
Finished
sorry
main/bf0fe38/2023-09-01
48m 26s
-
1
1
bf16
7
none
none
2
-
-
-
16
0.2
10
0.2
-
1
-
true
128
-
0
1
10000
0.05
0.95
PPOConfig
64
4
running
-
6
-
-
1
causal
Finished
sorry
main/bf0fe38/2023-09-01
6m 31s
-
1
1
bf16
1
none
none
2
0.001
1
0
-
-
-
-
0.1
0.99
1
-
56
1
20
-
-
-
-
ilqlconfig
-
-
-
5
-
0.7
true
-
causal
Finished
sorry
main/bf0fe38/2023-09-01
2m 59s
-
1
1
bf16
1
none
none
2
-
-
-
-
-
-
-
-
-
-
true
40
-
0
1
-
-
-
sftconfig
-
-
-
-
-
-
-
-
causal
Finished
sorry
main/bf0fe38/2023-09-01
3m 10s
-
1
1
bf16
1
none
none
2
-
-
-
128
0.2
10
0.2
-
1
-
true
40
-
0
1
10000
0.001
0.95
PPOConfig
128
4
ignored
-
-
-
-
1
causal
Finished
sorry
main/bf0fe38/2023-09-01
17m 56s
-
1
1
bf16
1
none
none
2
-
-
-
12
0.2
10
0.2
-
0.99
-
true
50
-
0
1
10000
0.05
0.95
PPOConfig
128
4
-
-
6
-
-
1
seq2seq
Finished
sorry
main/bf0fe38/2023-09-01
26s
-
-
-
no
1
-
-
-
-
-
-
128
0.2
1
0.2
-
1
-
true
9
-
0
1
10000
0
0.95
PPOConfig
128
4
ignored
-
-
-
-
1.2
causal
Finished
sorry
main/bf0fe38/2023-09-01
32s
-
-
-
no
1
-
-
-
0.1
1
0
-
-
-
-
0.1
0.99
[0,1,100]
-
9
1
10
-
-
-
-
ilqlconfig
-
-
-
5
-
0.8
true
-
causal
Finished
sorry
main/900040a/2023-08-21
27s
-
-
-
no
1
-
-
-
-
-
-
128
0.2
1
0.2
-
1
-
true
9
-
0
1
10000
0
0.95
PPOConfig
128
4
ignored
-
-
-
-
1.2
causal
Finished
sorry
main/900040a/2023-08-21
39s
-
-
-
no
1
-
-
-
0.1
1
0
-
-
-
-
0.1
0.99
[0,1,100]
-
9
1
10
-
-
-
-
ilqlconfig
-
-
-
5
-
0.8
true
-
causal
Finished
sorry
main/2e667e6/2023-08-08
24s
-
-
-
no
1
-
-
-
-
-
-
128
0.2
1
0.2
-
1
-
true
9
-
0
1
10000
0
0.95
PPOConfig
128
4
ignored
-
-
-
-
1.2
causal
Finished
sorry
main/2e667e6/2023-08-08
35s
-
-
-
no
1
-
-
-
0.1
1
0
-
-
-
-
0.1
0.99
[0,1,100]
-
9
1
10
-
-
-
-
ilqlconfig
-
-
-
5
-
0.8
true
-
causal
Finished
sorry
main/e36fe9d/2023-07-24
50m 52s
-
1
1
bf16
7
none
none
2
-
-
-
16
0.2
10
0.2
-
1
-
true
128
-
0
1
10000
0.05
0.95
PPOConfig
64
4
running
-
6
-
-
1
causal
Finished
sorry
main/e36fe9d/2023-07-24
6m 28s
-
1
1
bf16
1
none
none
2
0.001
1
0
-
-
-
-
0.1
0.99
1
-
56
1
20
-
-
-
-
ilqlconfig
-
-
-
5
-
0.7
true
-
causal
Finished
sorry
main/e36fe9d/2023-07-24
3m 37s
-
1
1
bf16
1
none
none
2
-
-
-
128
0.2
10
0.2
-
1
-
true
40
-
0
1
10000
0.001
0.95
PPOConfig
128
4
ignored
-
-
-
-
1
causal
Finished
sorry
main/e36fe9d/2023-07-24
3m 22s
-
1
1
bf16
1
none
none
2
-
-
-
-
-
-
-
-
-
-
true
40
-
0
1
-
-
-
sftconfig
-
-
-
-
-
-
-
-
causal
Finished
sorry
main/e36fe9d/2023-07-24
17m 50s
-
1
1
bf16
1
none
none
2
-
-
-
12
0.2
10
0.2
-
0.99
-
true
50
-
0
1
10000
0.05
0.95
PPOConfig
128
4
-
-
6
-
-
1
seq2seq
Finished
sorry
main/e36fe9d/2023-07-24
27s
-
-
-
no
1
-
-
-
-
-
-
128
0.2
1
0.2
-
1
-
true
9
-
0
1
10000
0
0.95
PPOConfig
128
4
ignored
-
-
-
-
1.2
causal
Finished
sorry
main/e36fe9d/2023-07-24
39s
-
-
-
no
1
-
-
-
0.1
1
0
-
-
-
-
0.1
0.99
[0,1,100]
-
9
1
10
-
-
-
-
ilqlconfig
-
-
-
5
-
0.8
true
-
causal
Finished
sorry
main/ea7c2b0/2023-07-22
50m 52s
-
1
1
bf16
7
none
none
2
-
-
-
16
0.2
10
0.2
-
1
-
true
128
-
0
1
10000
0.05
0.95
PPOConfig
64
4
running
-
6
-
-
1
causal
Finished
sorry
main/ea7c2b0/2023-07-22
6m 25s
-
1
1
bf16
1
none
none
2
0.001
1
0
-
-
-
-
0.1
0.99
1
-
56
1
20
-
-
-
-
ilqlconfig
-
-
-
5
-
0.7
true
-
causal
1-20
of 159