AnySafeReachability/configs.yaml at franka · CMU-IntentLab/AnySafeReachability · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
defaults:
  # Dubins car environment parameters
  speed: 1.
  turnRate: 1.25
  x_min: -1.5
  x_max: 1.5
  y_min: -1.5
  y_max: 1.5
  buffer: 0.1
  dt: 0.05
  obs_x: 0
  obs_y: 0
  obs_r: 0.5

  # offline data parameters
  data_length: 100
  num_trajs: 4000 # 2000 in paper
  num_train_trajs: 3800 #1900 in paper
  size: [128, 128]
  dataset_path: 'wm_demos_dubins_sc_F_arrow_0.15_128.pkl' # '128' should be the same as the size above

  # offline dreamer parameters, these are from https://github.com/NM512/dreamerv3_torch
  wm_name: ''
  logdir: 'logs/dreamer_dubins'
  rssm_ckpt_path: logs/dreamer_dubins/rssm_ckpt.pt # change this to whatever the path to your rssm checkpoint is
  traindir: null
  evaldir: null
  offline_traindir: ''
  offline_evaldir: ''
  seed: 0
  deterministic_run: False
  steps: 1e6
  parallel: False
  eval_every: 5e2
  eval_episode_num: 10
  log_every: 5e2
  reset_every: 0
  device: 'cuda:0'
  compile: True
  precision: 32
  debug: False
  video_pred_log: True

  rssm_train_steps: 10000 # 100000 in paper

  # Environment
  task: 'dubins-wm'
  envs: 1
  action_repeat: 1
  time_limit: 100
  grayscale: False
  prefill: 5000
  reward_EMA: True

  # Model
  dyn_hidden: 512
  dyn_deter: 512
  dyn_stoch: 32
  dyn_discrete: 0 # 0 for continuous latent
  dyn_rec_depth: 1
  dyn_mean_act: 'none'
  dyn_std_act: 'sigmoid2'
  dyn_min_std: 0.1
  grad_heads: ['decoder', 'cont'] # ,  'margin']
  units: 512
  act: 'SiLU'
  norm: True
  encoder:
    {mlp_keys: 'obs_state', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True}
    # {mlp_keys: '', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True}
  decoder:
    {mlp_keys: 'obs_state', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
    # {mlp_keys: '', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
  actor:
    {layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, std: 'learned', min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
  critic:
    {layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
  #reward_head:
  #  {layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
  cont_head:
    {layers: 2, loss_scale: 1.0, outscale: 1.0}
  # new
  margin_head:
    {layers: 2, loss_scale: 10.0}
  gamma_lx: 0.75
  dyn_scale: 0.5
  rep_scale: 0.1
  kl_free: 1.0
  weight_decay: 0.0
  unimix_ratio: 0.01
  initial: 'learned'

  # Dataset generation
  show_constraint: False
  constraint_color: 'red'
  arrow_size: 0.15 # Smaller is bigger here

  # Training
  batch_size: 16
  batch_length: 64
  train_ratio: 512
  pretrain: 100
  model_lr: 1e-4
  obs_lr: 1e-3
  lx_lr: 1e-4
  opt_eps: 1e-8
  grad_clip: 1000
  dataset_size: 1000000
  opt: 'adam'

  # Behavior.
  discount: 0.997
  discount_lambda: 0.95
  imag_horizon: 15
  imag_gradient: 'dynamics'
  imag_gradient_mix: 0.0
  eval_state_mean: False

  # Exploration
  expl_behavior: 'greedy'
  expl_until: 0
  expl_extr_scale: 0.0
  expl_intr_scale: 1.0
  disag_target: 'stoch'
  disag_log: True
  disag_models: 10
  disag_offset: 1
  disag_layers: 4
  disag_units: 400
  disag_action_cond: False


  # LCRL
  reward-threshold: null
  #seed: 0
  buffer-size: 1e6 # type=float, default=40000)
  actor-lr: 1e-4
  critic-lr: 1e-3
  gamma-pyhj: 0.9999 # type=float, default=0.95)
  tau: 0.005 # type=float, default=0.005)
  exploration-noise: 0.1 # type=float, default=0.1)
  epoch: 1 # type=int, default=10)
  warmup: 1 # number of epochs for warm up
  total-episodes: 20 # type=int, default=160)
  step-per-epoch: 40000 # type=int, default=40000)
  step-per-collect: 16 # type=int, default=8)
  update-per-step: 0.125 # type=float, default=0.125)
  batch_size-pyhj: 512 # type=int, default=512)
  control-net: [512, 512, 512, 512] # type=int, nargs='*', default=None) # for control policy
  critic-net: [512, 512, 512, 512]  # type=int, nargs='*', default=None) # for critic net
  constraint-embedding-dim: 512
  pass-semantic-constraint: True
  pass-semantic-state: False
  control-net-const: [] # type=int, nargs='*', default=(128, 128)
  training-num: 16 # type=int, default=8)
  test-num: 1 # type=int, default=100)
  render: 0. # type=float, default=0.)
  rew-norm: False # action='store_true', default=False)
  n-step: 1 # type=int, default=1)
  continue-training-logdir: null # type=str, default=None)
  continue-training-epoch: null # type=int, default=None)
  actor-gradient-steps: 1 # type=int, default=1)
  is-game-baseline: False # type=bool, default=False) # it will be set automatically
  target-update-freq: 400 # type=int, default=400)
  auto-alpha: 1
  alpha-lr: 3e-4
  alpha: 0.2
  weight-decay-pyhj: 0.001

  env-dist-type: ds # 'v' - vanilla, 'fc' - four corners, 'fcfe' - four corners four edges, 'rh' - right half, 'br' - big radii, 'uni' - uniform, 'rhfe' - right half four edges, 'rhbr' - right half big radii
  safety-margin-type: 'cos_sim' # cos_sim, learned
  safety-margin-threshold: -0.64 # type=float, default=0.5)
  safety-margin-hard-threshold: False # type=bool, default=False)
  pass-prototype: False # type=bool, default=False) # whether to pass a prototype to the policy net, not the raw constraint

  safety_filter_eps: 2.0

  actor-activation: 'ReLU' #type=str, default='ReLU')
  critic-activation: 'ReLU' # type=str, default='ReLU')
  kwargs: {} # type=str, default='{}')
  warm-start-path: null # type=str, default=None) # e.g., log/ra_droneracing_Game-v6/epoch_id_10/policy.pth

  nx: 51
  ny: 51
  nt: 51

  # Semantic encoder training
  sem:
    sz_embedding: 512 # Size of embedding that is appended to backbone model.
    sz_batch: 150 # Number of samples per batch.
    nb_epochs: 60 # Number of training epochs.
    gpu_id: 0 # ID of GPU that is used for training.
    nb_workers: 4 # Number of workers for dataloader.
    loss: 'priv' # Criterion for training
    optimizer: 'adamw' # Optimizer setting
    lr: 1e-4 # Learning rate setting
    weight_decay: 1e-4 # Weight decay setting
    lr_decay_step: 10 # Learning decay step setting
    lr_decay_gamma: 0.5 # Learning decay gamma setting
    save_model: True # Don't save model