-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathconfigs.yaml
More file actions
163 lines (151 loc) · 4.58 KB
/
configs.yaml
File metadata and controls
163 lines (151 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
defaults:
# Dubins car environment parameters
speed: 1.
turnRate: 1.25
x_min: -1.5
x_max: 1.5
y_min: -1.5
y_max: 1.5
buffer: 0.1
dt: 0.05
obs_x: 0
obs_y: 0
obs_r: 0.5
# offline data parameters
data_length: 100
num_trajs: 4000 # 2000 in paper
num_train_trajs: 3800 #1900 in paper
size: [128, 128]
dataset_path: "wm_demos128.pkl" # "128" should be the same as the size above
# offline dreamer parameters, these are from https://github.com/NM512/dreamerv3-torch
logdir: "logs/dreamer_dubins"
rssm_ckpt_path: logs/dreamer_dubins/rssm_ckpt.pt # change this to whatever the path to your rssm checkpoint is
traindir: null
evaldir: null
offline_traindir: ""
offline_evaldir: ""
seed: 0
deterministic_run: False
steps: 1e6
parallel: False
eval_every: 5e2
eval_episode_num: 10
log_every: 5e2
reset_every: 0
device: "cuda:0"
compile: True
precision: 32
debug: False
video_pred_log: True
rssm_train_steps: 10000 # 100000 in paper
# Environment
task: "dubins-wm"
envs: 1
action_repeat: 1
time_limit: 100
grayscale: False
prefill: 5000
reward_EMA: True
# Model
dyn_hidden: 512
dyn_deter: 512
dyn_stoch: 32
dyn_discrete: 0 # 0 for continuous latent
dyn_rec_depth: 1
dyn_mean_act: "none"
dyn_std_act: "sigmoid2"
dyn_min_std: 0.1
grad_heads: ["decoder", "margin", "cont"]
units: 512
act: "SiLU"
norm: True
encoder:
{mlp_keys: "obs_state", cnn_keys: "image", act: "SiLU", norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True}
decoder:
{mlp_keys: "obs_state", cnn_keys: "image", act: "SiLU", norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
actor:
{layers: 2, dist: "normal", entropy: 3e-4, unimix_ratio: 0.01, std: "learned", min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
critic:
{layers: 2, dist: "symlog_disc", slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
#reward_head:
# {layers: 2, dist: "symlog_disc", loss_scale: 1.0, outscale: 0.0}
cont_head:
{layers: 2, loss_scale: 1.0, outscale: 1.0}
# new
margin_head:
{layers: 2, loss_scale: 10.0}
gamma_lx: 0.75
dyn_scale: 0.5
rep_scale: 0.1
kl_free: 1.0
weight_decay: 0.0
unimix_ratio: 0.01
initial: "learned"
# Training
batch_size: 16
batch_length: 64
train_ratio: 512
pretrain: 100
model_lr: 1e-4
obs_lr: 1e-3
lx_lr: 1e-4
opt_eps: 1e-8
grad_clip: 1000
dataset_size: 1000000
opt: "adam"
# Behavior.
discount: 0.997
discount_lambda: 0.95
imag_horizon: 15
imag_gradient: "dynamics"
imag_gradient_mix: 0.0
eval_state_mean: False
# Exploration
expl_behavior: "greedy"
expl_until: 0
expl_extr_scale: 0.0
expl_intr_scale: 1.0
disag_target: "stoch"
disag_log: True
disag_models: 10
disag_offset: 1
disag_layers: 4
disag_units: 400
disag_action_cond: False
# LCRL
reward-threshold: null
#seed: 0
buffer-size: 40000
actor-lr: 1e-4
critic-lr: 1e-3
gamma-pyhj: 0.9999 # type=float, default=0.95)
tau: 0.005 # type=float, default=0.005)
exploration-noise: 0.1 # type=float, default=0.1)
epoch: 1 # type=int, default=10)
total-episodes: 15 # type=int, default=160)
step-per-epoch: 40000 # type=int, default=40000)
step-per-collect: 8 # type=int, default=8)
update-per-step: 0.125 # type=float, default=0.125)
batch_size-pyhj: 512 # type=int, default=512)
control-net: [128, 128, 128] # type=int, nargs="*", default=None) # for control policy
critic-net: [128, 128, 128] # type=int, nargs="*", default=None) # for critic net
training-num: 1 # type=int, default=8)
test-num: 1 # type=int, default=100)
render: 0. # type=float, default=0.)
rew-norm: False # action="store_true", default=False)
n-step: 1 # type=int, default=1)
continue-training-logdir: null # type=str, default=None)
continue-training-epoch: null # type=int, default=None)
actor-gradient-steps: 1 # type=int, default=1)
is-game-baseline: False # type=bool, default=False) # it will be set automatically
target-update-freq: 400 # type=int, default=400)
auto-alpha: 1
alpha-lr: 3e-4
alpha: 0.2
weight-decay-pyhj: 0.001
actor-activation: "ReLU" #type=str, default="ReLU")
critic-activation: "ReLU" # type=str, default="ReLU")
kwargs: {} # type=str, default="{}")
warm-start-path: null # type=str, default=None) # e.g., log/ra_droneracing_Game-v6/epoch_id_10/policy.pth
nx: 31
ny: 31