Flegrand31's workspace
Runs
2
State
Notes
User
Tags
Created
Runtime
Sweep
_current_progress_remaining
_custom_logger
_episode_num
_last_episode_starts
_last_obs
_last_original_obs
_logger
_n_updates
_num_timesteps_at_start
_stats_window_size
_total_timesteps
_vec_normalize_env
action_noise
action_space
algo
device
ent_coef
env
env_name
ep_info_buffer
ep_success_buffer
gae_lambda
gamma
learning_rate
lr_schedule
max_grad_norm
n_envs
n_steps
normalize_advantage
num_timesteps
observation_space
policy
policy_class
policy_kwargs
policy_type
rollout_buffer
rollout_buffer_class
rollout_buffer_kwargs
sde_sample_freq
seed
start_time
tensorboard_log
total_timesteps
use_sde
Finished
-
flegrand31
9m 22s
-
1
False
0
[ True]
OrderedDict([('achieved_goal', array([[ 3.8439669e-02, -2.1943560e-12, 1.9740014e-01]], dtype=float32)), ('desired_goal', array([[-0.13347386, -0.08443704, 0.15813869]], dtype=float32)), ('observation', array([[ 3.8439669e-02, -2.1943560e-12, 1.9740014e-01, 0.0000000e+00,
-0.0000000e+00, 0.0000000e+00]], dtype=float32))])
None
<stable_baselines3.common.logger.Logger object at 0x15f010290>
0
0
100
500000
None
None
Box(-1.0, 1.0, (7,), float32)
A2C
cpu
0
<stable_baselines3.common.vec_env.vec_video_recorder.VecVideoRecorder object at 0x16bfd5b90>
PandaReachJointsDense-v3
deque([], maxlen=100)
deque([], maxlen=100)
1
0.99
0.0007
<function constant_fn.<locals>.func at 0x16b872fc0>
0.5
1
5
False
0
Dict('achieved_goal': Box(-10.0, 10.0, (3,), float32), 'desired_goal': Box(-10.0, 10.0, (3,), float32), 'observation': Box(-10.0, 10.0, (6,), float32))
MultiInputActorCriticPolicy(
(features_extractor): CombinedExtractor(
(extractors): ModuleDict(
(achieved_goal): Flatten(start_dim=1, end_dim=-1)
(desired_goal): Flatten(start_dim=1, end_dim=-1)
(observation): Flatten(start_dim=1, end_dim=-1)
)
)
(pi_features_extractor): CombinedExtractor(
(extractors): ModuleDict(
(achieved_goal): Flatten(start_dim=1, end_dim=-1)
(desired_goal): Flatten(start_dim=1, end_dim=-1)
(observation): Flatten(start_dim=1, end_dim=-1)
)
)
(vf_features_extractor): CombinedExtractor(
(extractors): ModuleDict(
(achieved_goal): Flatten(start_dim=1, end_dim=-1)
(desired_goal): Flatten(start_dim=1, end_dim=-1)
(observation): Flatten(start_dim=1, end_dim=-1)
)
)
(mlp_extractor): MlpExtractor(
(policy_net): Sequential(
(0): Linear(in_features=12, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
)
(value_net): Sequential(
(0): Linear(in_features=12, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
)
)
(action_net): Linear(in_features=64, out_features=7, bias=True)
(value_net): Linear(in_features=64, out_features=1, bias=True)
)
<class 'stable_baselines3.common.policies.MultiInputActorCriticPolicy'>
{'optimizer_class': <class 'torch.optim.rmsprop.RMSprop'>, 'optimizer_kwargs': {'alpha': 0.99, 'eps': 1e-05, 'weight_decay': 0}}
MultiInputPolicy
<stable_baselines3.common.buffers.DictRolloutBuffer object at 0x16bf699d0>
<class 'stable_baselines3.common.buffers.DictRolloutBuffer'>
{}
-1
None
1707834242162989000
runs/j6p5hifl
500000
False
Finished
-
flegrand31
10m 16s
-
1
False
0
[ True]
OrderedDict([('achieved_goal', array([[ 3.8439669e-02, -2.1943560e-12, 1.9740014e-01]], dtype=float32)), ('desired_goal', array([[0.12024609, 0.07169523, 0.18676694]], dtype=float32)), ('observation', array([[ 3.8439669e-02, -2.1943560e-12, 1.9740014e-01, 0.0000000e+00,
-0.0000000e+00, 0.0000000e+00]], dtype=float32))])
None
<stable_baselines3.common.logger.Logger object at 0x2aba42bd0>
0
0
100
500000
None
None
Box(-1.0, 1.0, (7,), float32)
A2C
cpu
0
<stable_baselines3.common.vec_env.vec_video_recorder.VecVideoRecorder object at 0x15e49aad0>
PandaReachJointsDense-v3
deque([], maxlen=100)
deque([], maxlen=100)
1
0.99
0.0007
<function constant_fn.<locals>.func at 0x15e472fc0>
0.5
1
5
False
0
Dict('achieved_goal': Box(-10.0, 10.0, (3,), float32), 'desired_goal': Box(-10.0, 10.0, (3,), float32), 'observation': Box(-10.0, 10.0, (6,), float32))
MultiInputActorCriticPolicy(
(features_extractor): CombinedExtractor(
(extractors): ModuleDict(
(achieved_goal): Flatten(start_dim=1, end_dim=-1)
(desired_goal): Flatten(start_dim=1, end_dim=-1)
(observation): Flatten(start_dim=1, end_dim=-1)
)
)
(pi_features_extractor): CombinedExtractor(
(extractors): ModuleDict(
(achieved_goal): Flatten(start_dim=1, end_dim=-1)
(desired_goal): Flatten(start_dim=1, end_dim=-1)
(observation): Flatten(start_dim=1, end_dim=-1)
)
)
(vf_features_extractor): CombinedExtractor(
(extractors): ModuleDict(
(achieved_goal): Flatten(start_dim=1, end_dim=-1)
(desired_goal): Flatten(start_dim=1, end_dim=-1)
(observation): Flatten(start_dim=1, end_dim=-1)
)
)
(mlp_extractor): MlpExtractor(
(policy_net): Sequential(
(0): Linear(in_features=12, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
)
(value_net): Sequential(
(0): Linear(in_features=12, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
)
)
(action_net): Linear(in_features=64, out_features=7, bias=True)
(value_net): Linear(in_features=64, out_features=1, bias=True)
)
<class 'stable_baselines3.common.policies.MultiInputActorCriticPolicy'>
{'optimizer_class': <class 'torch.optim.rmsprop.RMSprop'>, 'optimizer_kwargs': {'alpha': 0.99, 'eps': 1e-05, 'weight_decay': 0}}
MultiInputPolicy
<stable_baselines3.common.buffers.DictRolloutBuffer object at 0x169bbb1d0>
<class 'stable_baselines3.common.buffers.DictRolloutBuffer'>
{}
-1
None
1707342720309639000
runs/tbju7d0l
500000
False
1-2
of 2