Emerge-Lab
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎configs/bc_config.yaml‎
Lines changed: 3 additions & 3 deletions b/‎configs/bc_config.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎configs/env_config.yaml‎
Lines changed: 5 additions & 5 deletions b/‎configs/env_config.yaml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎configs/exp_config.yaml‎
Lines changed: 5 additions & 4 deletions b/‎configs/exp_config.yaml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎configs/model_config.yaml‎
Lines changed: 27 additions & 9 deletions b/‎configs/model_config.yaml‎
Lines changed: 27 additions & 9 deletions
diff --git a/‎configs/model_quality.yaml‎
Lines changed: 20 additions & 0 deletions b/‎configs/model_quality.yaml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎evaluation/gen_res_x_intersection_df.py‎
Lines changed: 3 additions & 3 deletions b/‎evaluation/gen_res_x_intersection_df.py‎
Lines changed: 3 additions & 3 deletions
@@ -73,6 +73,7 @@ temp/
 # Waymo dataset
 data/train_no_tl/
 data/valid_no_tl/
+data/test_no_tl/
 
 # Logging output
 logs/
@@ -87,4 +88,4 @@ paper/
 
 # Videos and scene info dicts
 videos/
-scene_info/
+scene_info/
@@ -2,8 +2,8 @@
 save_model: true # Save model after training
 model_name: human_policy   # Name of saved model
 save_model_path: ./models/il # Path to save model
+save_data_path: ./data_il/train_no_tl # Path to save training data
 
 # Train
-total_samples: 80_000 # Number of obs-act-next_obs-done pairs to generate
-n_epochs: 20 # Training epochs
-net_arch: [256, 128] # Network architecture
+total_samples: 2_000_000 # Number of obs-act-next_obs-done pairs to generate
+net_arch: [128, 64] # Network architecture
@@ -15,7 +15,7 @@ dt: 0.1
 sims_per_step: 10
 discretize_actions: true
 include_head_angle: false # Whether to include the head tilt/angle as part of a vehicle's action
-accel_discretization: 15 
+accel_discretization: 21 
 accel_lower_bound: -4.0 # decelerate
 accel_upper_bound: 4.0 # accelerate
 steering_lower_bound: -0.3 # steer right
@@ -88,18 +88,18 @@ normalize_state: true
 # Ego feature names + max values in each category
 ego_state_feat_min: -30
 ego_state_feat_max:
-  veh_len: 16
-  veh_width: 4
+  veh_len: 25
+  veh_width: 5
   speed: 100
-  target_dist: 300
+  target_dist: 350
   target_azimuth: 3.14
   target_heading: 3.14
   rel_target_speed_dist: 40
   curr_accel: 5 # Vehicle acceleration
   curr_steering: 3
   curr_head_angle: 0.00001 # Not used at the moment
 
-vis_obs_max: 100 # The maximum value across visible state elements
+vis_obs_max: 110 # The maximum value across visible state elements
 vis_obs_min: -10 # The minimum value across visible state elements
 
 # # # # Agent settings # # # #
 
@@ -1,9 +1,9 @@
 project: new_env
-group: fixed_experts
+group: base_S100_A1
 env_id: Nocturne
 seed: 42
 track_wandb: true
-wandb_init_videos: ['expert']
+wandb_init_videos: []
 where_am_i: headless_machine   # Change to "headless_machine" when you're on a cluster or desktop
 exp_name: Nocturne
 verbose: 0
@@ -18,7 +18,7 @@ ma_callback:
   save_video: true
   model_save_freq: 250 # In iterations (one iter ~ (num_agents x n_steps))
   record_n_scenes: 10 # Number of different scenes to render
-  video_save_freq: 20 # Make a video every k iterations (100 iters ~ 1M steps)
+  video_save_freq: 50_000 # Make a video every k iterations (100 iters ~ 1M steps)
   video_deterministic: true
   eval_freq: 100 # Evaluate full RL task in deterministic mode (turn off intermediate goals)
 
@@ -35,7 +35,8 @@ learn:
 
 # human-regularized RL
 reg_weight: 0.0
-human_policy_path: models/il/human_policy_D403_S500_02_08_21_30.pt
+human_policy_path: models/il/human_policy_D651_S500_02_18_20_05_AV_ONLY.pt
+reg_weight_decay_schedule: None
 
 # Model arch
 model_config:
 
@@ -1,42 +1,60 @@
 # Base paths 
 bc_models_dir: models/il
-hr_ppo_models_dir_self_play: models/hr_rl/self_play
+hr_ppo_models_dir_self_play: models/hr_rl/self_play_0221
 
 # The human reference policy used for HR-PPO
 used_human_policy: 
-  - name: human_policy_D403_S500_02_08_21_30
+  - name: human_policy_D651_S500_02_18_20_05_AV_ONLY
     agent: BC
     train_agent: '-'
     wandb_run: '-'
 
 # HR-PPO models
 best_overall_models: 
-  - name: policy_L0.0_S100_I1750.zip
+  - name: policy_L0.0_S100_I2500.zip
     agent: PPO
     reg_weight: 0.0 
     train_agent: Self-play
     wandb_run: 
 
-  - name: policy_L0.01_S100_I2071.zip
+  - name: policy_L0.01_S100_I2579.zip
     reg_weight: 0.01
     agent: HR-PPO
     train_agent: Self-play
     wandb_run: 
 
+  - name: policy_L0.02_S100_I2585.zip
+    reg_weight: 0.02
+    agent: HR-PPO
+    train_agent: Self-play
+    wandb_run: 
+
+  - name: policy_L0.03_S100_I2611.zip
+    reg_weight: 0.03
+    agent: HR-PPO
+    train_agent: Self-play
+    wandb_run: 
+  
+  - name: policy_L0.04_S100_I2500.zip
+    reg_weight: 0.04
+    agent: HR-PPO
+    train_agent: Self-play
+    wandb_run: 
+
   - name: policy_L0.05_S100_I2000.zip
     reg_weight: 0.05
     agent: HR-PPO
     train_agent: Self-play
     wandb_run: 
 
-  - name: policy_L0.005_S100_I2059.zip
-    reg_weight: 0.005
+  - name: policy_L0.1_S100_I2000.zip
+    reg_weight: 0.1
     agent: HR-PPO
     train_agent: Self-play
     wandb_run: 
 
-  - name: policy_L0.025_S100_I2109.zip
-    reg_weight: 0.025
+  - name: policy_L0.2_S100_I2000.zip
+    reg_weight: 0.2
     agent: HR-PPO
     train_agent: Self-play
-    wandb_run: 
+    wandb_run: 
@@ -0,0 +1,20 @@
+# Base paths 
+bc_models_dir: 
+hr_ppo_models_dir_self_play:
+
+# The human reference policy used for HR-PPO
+human_policies: 
+  - name: 
+    agent: BC
+    train_agent: '-'
+    wandb_run: '-'
+
+# HR-PPO models
+hr_ppo_models: 
+  - name: 
+    agent: PPO
+    reg_weight: 0.0 
+    train_agent: Self-play
+    wandb_run: 
+
+  
@@ -58,8 +58,8 @@ def gen_and_save_res_df(
                 eval_dataset = data_sets[dataset]
                 scene_to_paths_dict = intersection_dicts[dataset] if intersection_dicts is not None else None
 
-                if num_controlled_agents >= 50:
-                    eval_episodes = num_scenes_to_select_from
+                if num_controlled_agents > 1:
+                    eval_episodes = num_scenes_to_select_from 
                 else:
                     eval_episodes = num_eval_episodes
 
@@ -154,7 +154,7 @@ def gen_and_save_res_df(
     # Generate dataframe
     gen_and_save_res_df(
         num_scenes_to_select_from=100,
-        num_eval_episodes=1000,
+        num_eval_episodes=4000,
         env_config=env_config,
         intersection_dicts={'Train': train_scene_to_paths_dict},
         model_config=models_config,