Esteban-cosserat's workspace
Runs
1
Name
1 visualized
State
Notes
User
Tags
Created
Runtime
Sweep
_current_progress_remaining
_custom_logger
_episode_num
_last_episode_starts
_last_obs
_last_original_obs
_logger
_n_updates
_num_timesteps_at_start
_stats_window_size
_total_timesteps
_vec_normalize_env
action_noise
action_space
algo
device
ent_coef
env
env_name
ep_info_buffer
ep_success_buffer
gae_lambda
gamma
learning_rate
lr_schedule
max_grad_norm
n_envs
n_steps
normalize_advantage
num_timesteps
observation_space
policy
policy_class
policy_kwargs
policy_type
rollout_buffer
rollout_buffer_class
rollout_buffer_kwargs
sde_sample_freq
seed
start_time
tensorboard_log
total_timesteps
use_sde
Finished
-
esteban-cosserat
35m 36s
-
1
False
0
[ True]
[[-0.01132675 -0.01131711 0.0287985 0.00486089]]
None
<stable_baselines3.common.logger.Logger object at 0x7f1dab5cae90>
0
0
100
400000
None
None
Discrete(2)
A2C
cpu
0
<stable_baselines3.common.vec_env.vec_video_recorder.VecVideoRecorder object at 0x7f1dc3fe91e0>
CartPole-v1
deque([], maxlen=100)
deque([], maxlen=100)
1
0.99
0.0007
<function constant_fn.<locals>.func at 0x7f1dc3fcbd90>
0.5
1
5
False
0
Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32)
ActorCriticPolicy(
(features_extractor): FlattenExtractor(
(flatten): Flatten(start_dim=1, end_dim=-1)
)
(pi_features_extractor): FlattenExtractor(
(flatten): Flatten(start_dim=1, end_dim=-1)
)
(vf_features_extractor): FlattenExtractor(
(flatten): Flatten(start_dim=1, end_dim=-1)
)
(mlp_extractor): MlpExtractor(
(policy_net): Sequential(
(0): Linear(in_features=4, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
)
(value_net): Sequential(
(0): Linear(in_features=4, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
)
)
(action_net): Linear(in_features=64, out_features=2, bias=True)
(value_net): Linear(in_features=64, out_features=1, bias=True)
)
<class 'stable_baselines3.common.policies.ActorCriticPolicy'>
{'optimizer_class': <class 'torch.optim.rmsprop.RMSprop'>, 'optimizer_kwargs': {'alpha': 0.99, 'eps': 1e-05, 'weight_decay': 0}}
MlpPolicy
<stable_baselines3.common.buffers.RolloutBuffer object at 0x7f1dc3fceda0>
<class 'stable_baselines3.common.buffers.RolloutBuffer'>
{}
-1
None
1707928875364327400
wandb_data/runs/bw565nvu
400000
False
1-1
of 1