Comment
Run set
52
State
Notes
User
Tags
Created
Runtime
Sweep
DQN.gamma
DQN.learning_rate
DQN.multi_step_learning
DQN.optimize_interval
DQN.replay_initial
DQN.replay_size
DQN.target_update_interval
DQN.training_batch_size
PPO.entropy_clip
PPO.entropy_reg
PPO.epochs_per_batch
PPO.eps_policy
PPO.eps_value
PPO.gamma
PPO.learning_rate
PPO.lmda
PPO.num_minibatches
PPO.steps_per_env
PPO.vf_coef
SafeLifePolicyNetwork.dense_depth
SafeLifePolicyNetwork.dense_width
algo
data_dir
deterministic
env.exit_difficulty.t
env.exit_difficulty.y
env.task_switch.t
env.task_switch.y
env.view_size
env_type
human_play
run_type
seed
side_effect.baseline
side_effect.penalty
side_effect.schedule.t
side_effect.schedule.y
steps
validation.env_seed
validation.num_levels
avg_length
benchmark/episodes
benchmark/length
benchmark/length_avg
Finished
stacey
5h 6m 52s
-
-
-
-
-
-
-
-
-
1
0.01
3
0.2
0.2
0.97
0.0003
0.95
4
20
0.5
1
512
ppo
append_2K_save_vid
false
[500000,2000000]
[0.001,1]
[100000,1500000]
[0.1,1]
25
append-spawn
-
train
4911098331208507
starting-state
0
[1000000,2000000]
[0,1]
2000000
732230218323780600
5
50
1000
50
50
Finished
stacey
14s
-
-
-
-
-
-
-
-
-
1
0.01
3
0.2
0.2
0.97
0.0003
0.95
4
20
0.5
1
512
ppo
tmp
false
[500000,2000000]
[0.001,1]
[100000,1500000]
[0.1,1]
25
append-spawn
-
train
8813835047844002
starting-state
0
[1000000,2000000]
[0,1]
300
732230218323780600
5
50
14
50
-
Finished
stacey
6s
-
-
-
-
-
-
-
-
-
1
0.01
3
0.2
0.2
0.97
0.0003
0.95
4
20
0.5
1
512
ppo
tmp
false
[500000,2000000]
[0.001,1]
[100000,1500000]
[0.1,1]
25
append-spawn
-
train
8025084050462675
starting-state
0
[1000000,2000000]
[0,1]
300
732230218323780600
5
-
-
-
-
Finished
stacey
3s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
ppo
tmp
false
-
-
-
-
-
append-spawn
-
train
-
-
-
-
-
20000
-
-
-
-
-
-
Finished
stacey
2s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
ppo
tmp
false
-
-
-
-
-
append-spawn
-
train
-
-
-
-
-
1000
-
-
-
-
-
-
Finished
stacey
3s
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
ppo
tmp
false
-
-
-
-
-
append-spawn
-
train
-
-
-
-
-
100
-
-
-
-
-
-
Finished
stacey
3m 5s
-
-
-
-
-
-
-
-
-
1
0.01
3
0.05
0.4
0.97
0.0003
0.95
4
20
0.5
1
512
ppo
tmp
false
[500000,2000000]
[0.001,1]
[100000,1500000]
[0.1,1]
25
append-spawn
-
train
573099493367412
starting-state
0
[1000000,2000000]
[0,1]
20000
732230218323780600
5
-
-
-
-
Finished
stacey
19s
-
-
-
-
-
-
-
-
-
1
0.01
3
0.05
0.4
0.97
0.0003
0.95
4
20
0.5
1
512
ppo
tmp
false
[500000,2000000]
[0.001,1]
[100000,1500000]
[0.1,1]
25
append-spawn
-
train
6154426503513261
starting-state
0
[1000000,2000000]
[0,1]
20000
732230218323780600
5
-
-
-
-
Finished
stacey
14s
-
-
-
-
-
-
-
-
-
1
0.01
3
0.05
0.4
0.97
0.0003
0.95
4
20
0.5
1
512
ppo
tmp_vid
false
[500000,2000000]
[0.001,1]
[100000,1500000]
[0.1,1]
25
append-spawn
-
train
1397601922159533
starting-state
0
[1000000,2000000]
[0,1]
1000
732230218323780600
5
-
-
-
-
Finished
stacey
22s
-
-
-
-
-
-
-
-
-
1
0.01
3
0.05
0.4
0.97
0.0003
0.95
4
20
0.5
1
512
ppo
tmp_vid
false
[500000,2000000]
[0.001,1]
[100000,1500000]
[0.1,1]
25
append-spawn
-
train
1971423231674092
starting-state
0
[1000000,2000000]
[0,1]
6000000
732230218323780600
5
-
-
-
-
1-10
of 52
Add a comment
Created with ❤️ on Weights & Biases.
https://wandb.ai/stacey/saferlife/reports/Latest-Benchmarks-1-26--VmlldzozNDI0NDc0