Skip to main content

PPO and PPG procgen report

Created on February 5|Last edited on February 5

5M10M15M20Mglobal_step020406080
Run set
1
Run set 2
3
Name
3 visualized
2
6
2
6
2
6
1
3
1
3
1
6
4
12
4
12
4
12
3
9
State
Notes
User
Tags
Created
Runtime
Sweep
alg
anneal_lr
aux_batch_size
aux_minibatch_size
batch_size
beta_clone
capture_video
clip_coef
clip_vloss
cuda
e_auxiliary
e_policy
ent_coef
env
exp_name
gae
gae_lambda
gamma
gym_id
learning_rate
max_grad_norm
minibatch_size
n_aux_grad_accum
n_aux_minibatch
n_iteration
network
norm_adv
num_env
num_envs
num_minibatches
num_steps
num_timesteps
play
reward_scale
save_video_interval
save_video_length
seed
torch_deterministic
total_timesteps
track
update_epochs
v_value
vf_coef
wandb_entity
Finished
-
costa-huang
2d 9h 12m 6s
-
ppo2
false
-
-
16384
-
false
0.2
true
true
-
-
0.01
bigfish
["baselines-ppo2-None","ppo_procgen"]
true
0.95
0.999
bigfish
0.0005
0.5
2048
-
-
-
-
true
64
64
8
256
25000000
false
1
0
200
2
true
25000000
true
3
-
0.5
vwxyzjn
Finished
-
costa-huang
2d 8h 33m 4s
-
ppo2
false
-
-
16384
-
false
0.2
true
true
-
-
0.01
bossfight
["baselines-ppo2-None","ppo_procgen"]
true
0.95
0.999
bossfight
0.0005
0.5
2048
-
-
-
-
true
64
64
8
256
25000000
false
1
0
200
2
true
25000000
true
3
-
0.5
vwxyzjn
Finished
-
costa-huang
2d 6h 49m 32s
-
ppo2
false
-
-
16384
-
false
0.2
true
true
-
-
0.01
starpilot
["baselines-ppo2-None","ppo_procgen"]
true
0.95
0.999
starpilot
0.0005
0.5
2048
-
-
-
-
true
64
64
8
256
25000000
false
1
0
200
2
true
25000000
true
3
-
0.5
vwxyzjn
Finished
-
costa-huang
4h 4m 13s
-
-
true
-
-
1024
-
false
0.1
true
true
-
-
0.01
-
ppo_atari_envpool
true
0.95
0.99
Breakout-v5
0.00025
0.5
256
-
-
-
-
true
-
8
4
128
-
-
-
-
-
2
true
10000000
true
4
-
0.5
vwxyzjn
Finished
-
costa-huang
4h 1m 33s
-
-
true
-
-
1024
-
false
0.1
true
true
-
-
0.01
-
ppo_atari_envpool
true
0.95
0.99
BeamRider-v5
0.00025
0.5
256
-
-
-
-
true
-
8
4
128
-
-
-
-
-
2
true
10000000
true
4
-
0.5
vwxyzjn
Finished
-
costa-huang
3d 6h 48m 17s
-
-
true
-
-
1536
-
false
0.15
true
true
-
-
0.01
-
ppo_atari_envpool
true
0.95
0.99
Pong-v5
0.00025
0.5
256
-
-
-
-
true
-
12
6
128
-
-
-
-
-
2
true
10000000
true
3.5
-
0.5
vwxyzjn
Finished
-
costa-huang
24m 8s
-
ppo2
true
-
-
512
-
false
0.2
true
false
-
-
0.01
MountainCar-v0
["baselines-ppo2-mlp","baselines-ppo2-mlp-seperate-networks","ppo","ppo_shared"]
true
0.95
0.99
MountainCar-v0
0.00025
0.5
128
-
-
-
mlp
true
-
4
4
128
500000
false
1
0
200
2
true
500000
true
4
-
0.5
vwxyzjn
Finished
-
costa-huang
36m 48s
-
ppo2
true
-
-
512
-
false
0.2
true
false
-
-
0.01
Acrobot-v1
["baselines-ppo2-mlp","baselines-ppo2-mlp-seperate-networks","ppo","ppo_shared"]
true
0.95
0.99
Acrobot-v1
0.00025
0.5
128
-
-
-
mlp
true
-
4
4
128
500000
false
1
0
200
2
true
500000
true
4
-
0.5
vwxyzjn
Finished
-
costa-huang
11d 18h 42m 33s
-
ppo2
true
-
-
512
-
false
0.2
true
false
-
-
0.01
CartPole-v1
["baselines-ppo2-mlp","baselines-ppo2-mlp-seperate-networks","ppo","ppo_shared"]
true
0.95
0.99
CartPole-v1
0.00025
0.5
128
-
-
-
mlp
true
4
4
4
128
500000
false
1
0
200
2
true
500000
true
4
-
0.5
vwxyzjn
Finished
-
costa-huang
19d 18h 20s
-
ppo2
true
-
-
1024
-
false
0.1
true
true
-
-
0.01
MicrortsRandomEnemyShapedReward3-v1
["baselines-ppo2-cnn_gym_microrts","ppo_multidiscrete","ppo_multidiscrete_mask"]
true
0.95
0.99
MicrortsRandomEnemyShapedReward3-v1
0.00025
0.5
256
-
-
-
cnn_gym_microrts
true
8
8
4
128
2000000
false
1
0
200
2
true
2000000
true
4
-
0.5
vwxyzjn
1-10
of 19