Skip to main content

TD3+BC, D4RL: Evaluation for Best Hyperparameters

Tables 2, 3, 4 in the paper
Created on May 15|Last edited on May 15

HalfCheetah


2004006008001k1.2kStep020406080
dataset_name: halfcheetah-full-replay-v2, actor_bc_coef: 0.01, critic_bc_coef: 0
dataset_name: halfcheetah-expert-v2, actor_bc_coef: 0.4, critic_bc_coef: 0
dataset_name: halfcheetah-random-v2, actor_bc_coef: 0.001, critic_bc_coef: 0
dataset_name: halfcheetah-medium-replay-v2, actor_bc_coef: 0.05, critic_bc_coef: 0
dataset_name: halfcheetah-medium-expert-v2, actor_bc_coef: 0.1, critic_bc_coef: 0
dataset_name: halfcheetah-medium-v2, actor_bc_coef: 0.01, critic_bc_coef: 0
Run set
8358
Name
60 visualized
1
10
1
10
1
10
1
10
1
10
1
10
State
Notes
User
Tags
Created
Runtime
Sweep
actor_bc_coef
actor_learning_rate
actor_ln
batch_size
bc_coef_mul
config_path
critic_bc_coef
critic_learning_rate
critic_ln
dataset_name
eval_episodes
eval_every
eval_seed
gamma
group
hidden_dim
mlc_job_name
name
noise_clip
normalize_q
normalize_reward
normalize_states
num_epochs
num_updates_on_epoch
policy_freq
policy_noise
project
tau
train_seed
actor_n_hiddens
bc_coef
buffer_size
critic_n_hiddens
device
discount
encoder_learning_rate
eval_freq
expl_noise
load_model
max_timesteps
min_decay_coef
mixing_ratio
n_episodes
normalize
Finished
adagrad
36m 7s
-
0.01
0.0003
false
256
-
configs/sac-doup/halfcheetah/halfcheetah_full_replay.yaml
0
0.0003
false
halfcheetah-full-replay-v2
10
5
42
0.99
sac-doup-halfcheetah-full-replay-v2-sweep-v0
256
["selectel-a100-1x-TD3-BC-1202w7","selectel-a100-1x-TD3-BC-3jjt3u","selectel-a100-1x-TD3-BC-6ynrx2","selectel-a100-1x-TD3-BC-77h3sn","selectel-a100-1x-TD3-BC-aamyqj","selectel-a100-1x-TD3-BC-bkhgfz","selectel-a100-1x-TD3-BC-kgakg3","selectel-a100-1x-TD3-BC-sz43es"]
["sac-doup-halfcheetah-full-replay-v2-06e2945f","sac-doup-halfcheetah-full-replay-v2-0c7f25ba","sac-doup-halfcheetah-full-replay-v2-5e8d6d3e","sac-doup-halfcheetah-full-replay-v2-6c8ed3cb","sac-doup-halfcheetah-full-replay-v2-78b15751","sac-doup-halfcheetah-full-replay-v2-eaee15d6","sac-doup-halfcheetah-full-replay-v2-ef885158","sac-doup-halfcheetah-full-replay-v2-f75536ec","sac-doup-halfcheetah-full-replay-v2-fd069278","sac-doup-halfcheetah-full-replay-v2-fd83700a"]
0.5
true
false
false
1000
1000
2
0.2
DOUP
0.005
14.5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
adagrad
35m 37s
-
0.4
0.0003
false
256
-
configs/sac-doup/halfcheetah/halfcheetah_expert.yaml
0
0.0003
false
halfcheetah-expert-v2
10
5
42
0.99
sac-doup-halfcheetah-expert-v2-sweep-v0
256
["selectel-a100-1x-TD3-BC-1202w7","selectel-a100-1x-TD3-BC-3jjt3u","selectel-a100-1x-TD3-BC-6ynrx2","selectel-a100-1x-TD3-BC-77h3sn","selectel-a100-1x-TD3-BC-aamyqj","selectel-a100-1x-TD3-BC-bkhgfz","selectel-a100-1x-TD3-BC-kgakg3","selectel-a100-1x-TD3-BC-sz43es"]
["sac-doup-halfcheetah-expert-v2-005b8c34","sac-doup-halfcheetah-expert-v2-18058f6c","sac-doup-halfcheetah-expert-v2-33f3f312","sac-doup-halfcheetah-expert-v2-5903ae09","sac-doup-halfcheetah-expert-v2-6d7962b3","sac-doup-halfcheetah-expert-v2-8f6e248f","sac-doup-halfcheetah-expert-v2-9c3ccac5","sac-doup-halfcheetah-expert-v2-b580fcae","sac-doup-halfcheetah-expert-v2-d8867c55","sac-doup-halfcheetah-expert-v2-da822dc4"]
0.5
true
false
false
1000
1000
2
0.2
DOUP
0.005
14.5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
adagrad
34m 35s
-
0.001
0.0003
false
256
-
configs/sac-doup/halfcheetah/halfcheetah_random.yaml
0
0.0003
false
halfcheetah-random-v2
10
5
42
0.99
sac-doup-halfcheetah-random-v2-sweep-v0
256
["selectel-a100-1x-TD3-BC-1202w7","selectel-a100-1x-TD3-BC-3jjt3u","selectel-a100-1x-TD3-BC-6ynrx2","selectel-a100-1x-TD3-BC-77h3sn","selectel-a100-1x-TD3-BC-aamyqj","selectel-a100-1x-TD3-BC-bkhgfz","selectel-a100-1x-TD3-BC-kgakg3","selectel-a100-1x-TD3-BC-sz43es"]
["sac-doup-halfcheetah-random-v2-063224d3","sac-doup-halfcheetah-random-v2-0cf1e86e","sac-doup-halfcheetah-random-v2-18069337","sac-doup-halfcheetah-random-v2-25218b92","sac-doup-halfcheetah-random-v2-284150d7","sac-doup-halfcheetah-random-v2-9862e9f1","sac-doup-halfcheetah-random-v2-a877c139","sac-doup-halfcheetah-random-v2-abe32adc","sac-doup-halfcheetah-random-v2-cbd973cd","sac-doup-halfcheetah-random-v2-fff483c0"]
0.5
true
false
false
1000
1000
2
0.2
DOUP
0.005
14.5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
adagrad
34m 4s
-
0.05
0.0003
false
256
-
configs/sac-doup/halfcheetah/halfcheetah_medium_replay.yaml
0
0.0003
false
halfcheetah-medium-replay-v2
10
5
42
0.99
sac-doup-halfcheetah-medium-replay-v2-sweep-v0
256
["selectel-a100-1x-TD3-BC-1202w7","selectel-a100-1x-TD3-BC-3jjt3u","selectel-a100-1x-TD3-BC-6ynrx2","selectel-a100-1x-TD3-BC-77h3sn","selectel-a100-1x-TD3-BC-aamyqj","selectel-a100-1x-TD3-BC-bkhgfz","selectel-a100-1x-TD3-BC-kgakg3","selectel-a100-1x-TD3-BC-sz43es"]
["sac-doup-halfcheetah-medium-replay-v2-252fd8cf","sac-doup-halfcheetah-medium-replay-v2-3e368aba","sac-doup-halfcheetah-medium-replay-v2-4a811e64","sac-doup-halfcheetah-medium-replay-v2-50232642","sac-doup-halfcheetah-medium-replay-v2-54c97ea4","sac-doup-halfcheetah-medium-replay-v2-b434fe25","sac-doup-halfcheetah-medium-replay-v2-ee9d86b8","sac-doup-halfcheetah-medium-replay-v2-f070a0de","sac-doup-halfcheetah-medium-replay-v2-f3695183","sac-doup-halfcheetah-medium-replay-v2-fad90bfc"]
0.5
true
false
false
1000
1000
2
0.2
DOUP
0.005
14.5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
adagrad
35m 4s
-
0.1
0.0003
false
256
-
configs/sac-doup/halfcheetah/halfcheetah_medium_expert.yaml
0
0.0003
false
halfcheetah-medium-expert-v2
10
5
42
0.99
sac-doup-halfcheetah-medium-expert-v2-sweep-v0
256
["selectel-a100-1x-TD3-BC-1202w7","selectel-a100-1x-TD3-BC-3jjt3u","selectel-a100-1x-TD3-BC-6ynrx2","selectel-a100-1x-TD3-BC-77h3sn","selectel-a100-1x-TD3-BC-aamyqj","selectel-a100-1x-TD3-BC-bkhgfz","selectel-a100-1x-TD3-BC-kgakg3","selectel-a100-1x-TD3-BC-sz43es"]
["sac-doup-halfcheetah-medium-expert-v2-3e13fcb5","sac-doup-halfcheetah-medium-expert-v2-410c91e7","sac-doup-halfcheetah-medium-expert-v2-596239c6","sac-doup-halfcheetah-medium-expert-v2-69393785","sac-doup-halfcheetah-medium-expert-v2-917a05cc","sac-doup-halfcheetah-medium-expert-v2-91cc4745","sac-doup-halfcheetah-medium-expert-v2-9a63b0cd","sac-doup-halfcheetah-medium-expert-v2-c922f861","sac-doup-halfcheetah-medium-expert-v2-d0a125c5","sac-doup-halfcheetah-medium-expert-v2-e45b6c64"]
0.5
true
false
false
1000
1000
2
0.2
DOUP
0.005
14.5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Finished
adagrad
34m 18s
-
0.01
0.0003
false
256
-
configs/sac-doup/halfcheetah/halfcheetah_medium.yaml
0
0.0003
false
halfcheetah-medium-v2
10
5
42
0.99
sac-doup-halfcheetah-medium-v2-sweep-v0
256
["selectel-a100-1x-TD3-BC-1202w7","selectel-a100-1x-TD3-BC-3jjt3u","selectel-a100-1x-TD3-BC-6ynrx2","selectel-a100-1x-TD3-BC-77h3sn","selectel-a100-1x-TD3-BC-aamyqj","selectel-a100-1x-TD3-BC-bkhgfz","selectel-a100-1x-TD3-BC-kgakg3","selectel-a100-1x-TD3-BC-sz43es"]
["sac-doup-halfcheetah-medium-v2-186606cf","sac-doup-halfcheetah-medium-v2-33b72a4a","sac-doup-halfcheetah-medium-v2-34ad6754","sac-doup-halfcheetah-medium-v2-38dba74e","sac-doup-halfcheetah-medium-v2-5a0607a7","sac-doup-halfcheetah-medium-v2-6da372a4","sac-doup-halfcheetah-medium-v2-775c7817","sac-doup-halfcheetah-medium-v2-a4e0aec5","sac-doup-halfcheetah-medium-v2-a6f12c47","sac-doup-halfcheetah-medium-v2-b0327f00"]
0.5
true
false
false
1000
1000
2
0.2
DOUP
0.005
14.5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
1-6
of 6


Hopper



Run set
5923


Walker2d


Run set
8358


AntMaze


Run set
8358


Pen


Run set
8358


Door


Run set
8358


Hammer


Run set
8358


Relocate


Run set
8358