From fc978745dbf6846be02e752842778413f0049d9a Mon Sep 17 00:00:00 2001
From: Henri Fung <kaikwanfung@gmail.com>
Date: Thu, 20 Nov 2025 12:23:22 -0800
Subject: [PATCH 1/5] Working Latent temp

---
 gcu_objects                                   |  2 +-
 rsl_rl                                        |  2 +-
 scripts/rsl_rl/play.py                        | 24 ++---
 scripts/rsl_rl/train.py                       |  6 +-
 .../gculab/envs/manager_based_rl_gcu_env.py   | 12 +--
 source/gculab_rl/gculab_rl/rsl_rl/__init__.py |  2 +-
 .../gculab_rl/rsl_rl/gcu_vecenv_wrapper.py    |  4 +-
 source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py   | 87 +++++++++++++++++++
 .../pack/config/no_arm/__init__.py            | 10 +++
 .../rsl_rl_ppo_camera_obj_latent_cfg.py       | 50 +++++++++++
 .../config/no_arm/joint_pos_camera_env_cfg.py | 13 +++
 .../tasks/manager_based/pack/mdp/rewards.py   |  5 ++
 .../manager_based/pack/pack_camera_env_cfg.py | 40 +++++++--
 .../tasks/manager_based/pack/pack_env_cfg.py  | 51 +++++------
 .../manager_based/pack/utils/tote_manager.py  | 47 ++++++----
 15 files changed, 280 insertions(+), 75 deletions(-)
 create mode 100644 source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py

diff --git a/gcu_objects b/gcu_objects
index b2cd22a..a582d59 160000
--- a/gcu_objects
+++ b/gcu_objects
@@ -1 +1 @@
-Subproject commit b2cd22a08c9af865b5b635d619658d2257714186
+Subproject commit a582d59dda01b85f5290cb692b2644e9c3d53e9c
diff --git a/rsl_rl b/rsl_rl
index b89619a..52c5bd0 160000
--- a/rsl_rl
+++ b/rsl_rl
@@ -1 +1 @@
-Subproject commit b89619ad1449d692fe2ac41c55258862be0d691a
+Subproject commit 52c5bd047aebf194b8a7784669678edc0e53457c
diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py
index 99b4b84..1ea7d0b 100644
--- a/scripts/rsl_rl/play.py
+++ b/scripts/rsl_rl/play.py
@@ -61,17 +61,15 @@
 import geodude.tasks  # noqa: F401
 from gculab_rl.rsl_rl import (
     RslRlGCUVecEnvWrapper,
-    export_policy_as_jit,
-    export_policy_as_onnx,
 )
 from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
 from isaaclab.utils.assets import retrieve_file_path
 from isaaclab.utils.dict import print_dict
 from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
-from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
 from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg
 
-from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner
+from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner
 from rsl_rl.utils import normalize_and_flatten_image_obs
 
 
@@ -122,7 +120,7 @@ def main():
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    if agent_cfg.policy.class_name == "ActorCriticConv2d":
+    if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
         env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
     else:
         env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
@@ -131,6 +129,8 @@ def main():
     # load previously trained model
     if agent_cfg.policy.class_name == "ActorCriticConv2d":
         ppo_runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
+        ppo_runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
     else:
         ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
     ppo_runner.load(resume_path)
@@ -191,13 +191,13 @@ def main():
             print(
                 "GCU ", env.unwrapped.tote_manager.get_gcu(torch.arange(args_cli.num_envs, device=env.unwrapped.device))
             )
-            # print("\n===== Ejection Summary =====")
-            # print(f"Total steps: {stats['total_steps']}")
-            # if ejection_summary != {}:
-            #     for i in range(len(ejection_summary.keys())):
-            #         env_id = list(ejection_summary.keys())[i]
-            #         print(ejection_summary[env_id])
-            #     print("==========================\n")
+            print("\n===== Ejection Summary =====")
+            print(f"Total steps: {stats['total_steps']}")
+            if ejection_summary != {}:
+                for i in range(len(ejection_summary.keys())):
+                    env_id = list(ejection_summary.keys())[i]
+                    print(ejection_summary[env_id])
+                print("==========================\n")
             # env.unwrapped.bpp.update_container_heightmap(env, torch.arange(args_cli.num_envs).to(env.unwrapped.device), torch.zeros(args_cli.num_envs, device=env.unwrapped.device).int())
             # env stepping
             obs, _, _, infos = env.step(actions, image_obs=image_obs)
diff --git a/scripts/rsl_rl/train.py b/scripts/rsl_rl/train.py
index b5326c6..6e280fb 100644
--- a/scripts/rsl_rl/train.py
+++ b/scripts/rsl_rl/train.py
@@ -95,7 +95,7 @@
 from isaaclab_tasks.utils import get_checkpoint_path
 from isaaclab_tasks.utils.hydra import hydra_task_config
 
-from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner
+from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner
 
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
@@ -164,7 +164,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    if agent_cfg.policy.class_name == "ActorCriticConv2d":
+    if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
         env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
     else:
         env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
@@ -172,6 +172,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     # create runner from rsl-rl
     if agent_cfg.policy.class_name == "ActorCriticConv2d":
         runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
+    elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
+        runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
     else:
         runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
     # write git state to logs
diff --git a/source/gculab/gculab/envs/manager_based_rl_gcu_env.py b/source/gculab/gculab/envs/manager_based_rl_gcu_env.py
index 2abe9e1..e453f87 100644
--- a/source/gculab/gculab/envs/manager_based_rl_gcu_env.py
+++ b/source/gculab/gculab/envs/manager_based_rl_gcu_env.py
@@ -220,12 +220,12 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
         for i in range(wait_time):
             self.scene.write_data_to_sim()
             self.sim.step(render=False)
-            if (
-                self._sim_step_counter % self.cfg.sim.render_interval == 0
-                and is_rendering
-                and self.tote_manager.animate
-            ):
-                self.sim.render()
+            # if (
+            #     self._sim_step_counter % self.cfg.sim.render_interval == 0
+            #     and is_rendering
+            #     and self.tote_manager.animate
+            # ):
+            #     self.sim.render()
             # update buffers at sim dt - only on last iteration to reduce GPU interface calls
             if i == wait_time - 1:
                 self.scene.update(dt=self.physics_dt)
diff --git a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py
index 4c8ccf5..4b09a40 100644
--- a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py
+++ b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py
@@ -16,4 +16,4 @@
 """
 
 from .gcu_vecenv_wrapper import RslRlGCUVecEnvWrapper
-from .rl_cfg import RslRlPpoActorCriticConv2dCfg
+from .rl_cfg import RslRlPpoActorCriticConv2dCfg, RslRlGCUPpoAlgorithmCfg, RslRlPpoActorCriticConv2dPointNetCfg
diff --git a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
index b857e50..bfd4bef 100644
--- a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
+++ b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
@@ -136,8 +136,8 @@ def _convert_to_pos_quat(self, actions: torch.Tensor, object_to_pack: list) -> t
         rotated_dim = calculate_rotated_bounding_box(bbox_offset, quats, device=self.env.unwrapped.device)
         x_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0]
         y_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1]
-        x = torch.sigmoid(x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0])
-        y = torch.sigmoid(y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1])
+        x = torch.sigmoid(5 * x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0])
+        y = torch.sigmoid(5 * y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1])
 
         # Compute z analytically for each sample in the batch using multiprocessing
         z = torch.zeros_like(x)
diff --git a/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py b/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py
index 5e8c393..ce009ff 100644
--- a/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py
+++ b/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py
@@ -8,7 +8,10 @@
 
 from isaaclab.utils import configclass
 from isaaclab_rl.rsl_rl import RslRlPpoActorCriticCfg
+from isaaclab.utils import configclass
 
+from isaaclab_rl.rsl_rl.rnd_cfg import RslRlRndCfg
+from isaaclab_rl.rsl_rl.symmetry_cfg import RslRlSymmetryCfg
 
 @configclass
 class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):
@@ -26,3 +29,87 @@ class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):
 
     conv_linear_output_size: int = 16
     """Output size of the linear layer after the convolutional features are flattened."""
+
+@configclass
+class RslRlPpoActorCriticConv2dPointNetCfg(RslRlPpoActorCriticConv2dCfg):
+    """Configuration for the PPO actor-critic networks with convolutional layers and PointNet."""
+
+    class_name: str = "ActorCriticConv2dPointNet"
+    """The policy class name. Default is ActorCriticConv2dPointNet."""
+
+    pointnet_layers_params: list[dict] = [
+        {"out_channels": 64},
+        {"out_channels": 256},
+    ]
+    """List of PointNet layer parameters."""
+    pointnet_in_dim: int = 8
+    """Input dimension for the PointNet."""
+    pointnet_num_points: int = 512
+    """Number of points for the PointNet."""
+
+############################
+# Algorithm configurations #
+############################
+
+
+@configclass
+class RslRlGCUPpoAlgorithmCfg:
+    """Configuration for the PPO algorithm."""
+
+    class_name: str = "PPO"
+    """The algorithm class name. Default is PPO."""
+
+    num_learning_epochs: int = MISSING
+    """The number of learning epochs per update."""
+
+    num_mini_batches: int = MISSING
+    """The number of mini-batches per update."""
+
+    learning_rate: float = MISSING
+    """The learning rate for the policy."""
+
+    schedule: str = MISSING
+    """The learning rate schedule."""
+
+    gamma: float = MISSING
+    """The discount factor."""
+
+    lam: float = MISSING
+    """The lambda parameter for Generalized Advantage Estimation (GAE)."""
+
+    placement_entropy_coef: float = MISSING
+    """The coefficient for the placement_entropy_coef loss."""
+
+    orientation_entropy_coef: float = MISSING
+    """The coefficient for the orientation_entropy_coef loss."""
+
+    desired_kl: float = MISSING
+    """The desired KL divergence."""
+
+    max_grad_norm: float = MISSING
+    """The maximum gradient norm."""
+
+    value_loss_coef: float = MISSING
+    """The coefficient for the value loss."""
+
+    use_clipped_value_loss: bool = MISSING
+    """Whether to use clipped value loss."""
+
+    clip_param: float = MISSING
+    """The clipping parameter for the policy."""
+
+    normalize_advantage_per_mini_batch: bool = False
+    """Whether to normalize the advantage per mini-batch. Default is False.
+
+    If True, the advantage is normalized over the mini-batches only.
+    Otherwise, the advantage is normalized over the entire collected trajectories.
+    """
+
+    symmetry_cfg: RslRlSymmetryCfg | None = None
+    """The symmetry configuration. Default is None, in which case symmetry is not used."""
+
+    rnd_cfg: RslRlRndCfg | None = None
+    """The configuration for the Random Network Distillation (RND) module. Default is None,
+    in which case RND is not used.
+    """
+
diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py
index a73635e..21ee668 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py
@@ -30,3 +30,13 @@
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_cfg:NoArmPackPPOCameraRunnerCfg",
     },
 )
+
+gym.register(
+    id="Isaac-Pack-NoArm-Camera-Obj-Latent-v0",
+    entry_point="gculab.envs:ManagerBasedRLGCUEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_camera_env_cfg:NoArmPackCameraObjLatentEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_obj_latent_cfg:NoArmPackPPOCameraObjLatentRunnerCfg",
+    },
+)
diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
new file mode 100644
index 0000000..2decedd
--- /dev/null
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dPointNetCfg
+from isaaclab.utils import configclass
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg
+from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg
+
+@configclass
+class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 4
+    max_iterations = 3000
+    save_interval = 10
+    experiment_name = "no_arm_pack"
+    empirical_normalization = True
+    policy = RslRlPpoActorCriticConv2dPointNetCfg(
+        init_noise_std=1.5,
+        actor_hidden_dims=[128, 128],
+        critic_hidden_dims=[128, 128],
+        activation="elu",
+        conv_layers_params=[
+            {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
+            # {"out_channels": 8, "kernel_size": 3, "stride": 2},
+            {"out_channels": 16, "kernel_size": 3, "stride": 2},
+        ],
+        conv_linear_output_size=128,  # Project 128×13×10 into 256-dim
+        pointnet_layers_params=[
+            {"out_channels": 64},
+            {"out_channels": 256},
+        ],
+        pointnet_in_dim=8,
+        pointnet_num_points=512,
+    )
+    algorithm = RslRlGCUPpoAlgorithmCfg(
+        value_loss_coef=0.5,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        placement_entropy_coef=0.0005,
+        orientation_entropy_coef=0.01,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.01,
+        max_grad_norm=1.0,
+    )
diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py
index 5fdd1e9..a621e73 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py
@@ -7,6 +7,7 @@
 from isaaclab.utils import configclass
 from geodude.tasks.manager_based.pack.pack_camera_env_cfg import (
     PackDepthCameraEnvCfg,
+    PackDepthCameraObjLatentEnvCfg,
 )
 
 ##
@@ -26,6 +27,18 @@ def __post_init__(self):
         )  # asset name is not used in this env
 
 
+@configclass
+class NoArmPackCameraObjLatentEnvCfg(PackDepthCameraObjLatentEnvCfg):
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+
+        self.scene.robot = None
+        self.actions.packing_action = mdp.PackingActionCfg(
+            asset_name="tote1", place_obj_bottomLeft=True
+        )  # asset name is not used in this env
+
+
 @configclass
 class NoArmPackCameraEnvCfg_PLAY(NoArmPackCameraEnvCfg):
     def __post_init__(self):
diff --git a/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py b/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py
index 53bc27e..55ad86e 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py
@@ -67,3 +67,8 @@ def orientation_command_error(env: ManagerBasedRLEnv, command_name: str, asset_c
     des_quat_w = quat_mul(asset.data.root_state_w[:, 3:7], des_quat_b)
     curr_quat_w = asset.data.body_state_w[:, asset_cfg.body_ids[0], 3:7]  # type: ignore
     return quat_error_magnitude(curr_quat_w, des_quat_w)
+
+
+def episode_bonus(env: ManagerBasedRLEnv) -> torch.Tensor:
+    """Gives a bonus reward at the end of the episode."""
+    return 1.0
\ No newline at end of file
diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py
index 963079b..160f166 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py
@@ -76,12 +76,31 @@ class PolicyCfg(ObsGroup):
         """Observations for policy group."""
 
         # observation terms (order preserved)
-        # actions = ObsTerm(func=mdp.last_action)
-        obs_dims = ObsTerm(func=mdp.obs_dims)
+        # last_action = ObsTerm(func=mdp.last_action)
+        obs_lookahead = ObsTerm(func=mdp.obs_lookahead, params={"max_objects": 1})
 
-        # def __post_init__(self):
-        #     self.enable_corruption = True
-        #     self.concatenate_terms = True
+    class SensorCfg(ObsGroup):
+        """Observations for sensor group."""
+
+        image = ObsTerm(
+            func=mdp.image, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "distance_to_camera"}
+        )
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+    sensor: SensorCfg = SensorCfg()
+
+
+@configclass
+class DepthObservationsObjLatentCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        # observation terms (order preserved)
+        obs_latents = ObsTerm(func=mdp.obs_latents)
 
     class SensorCfg(ObsGroup):
         """Observations for sensor group."""
@@ -149,6 +168,17 @@ def __post_init__(self):
         # remove ground as it obstructs the camera
         # self.scene.ground = None
 
+class PackDepthCameraObjLatentEnvCfg(PackEnvCfg):
+    """Configuration for the packing environment with depth camera and object latents."""
+
+    scene: PackDepthCameraSceneCfg = PackDepthCameraSceneCfg(num_envs=512, env_spacing=2.5, replicate_physics=False)
+    observations: DepthObservationsObjLatentCfg = DepthObservationsObjLatentCfg()
+
+    def __post_init__(self):
+        super().__post_init__()
+        # remove ground as it obstructs the camera
+        self.scene.ground = None
+
 
 @configclass
 class PackResNet18DepthCameraEnvCfg(PackDepthCameraEnvCfg):
diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py
index 11eabfa..0bca656 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py
@@ -56,24 +56,24 @@
 # Define which object IDs to include
 ycb_include_ids = [
     "003",  # cracker_box
-    # "004",  # sugar_box
-    # "006",  # mustard_bottle
-    # "007",  # tuna_fish_can
-    # # "008",  # pudding_box
-    # # "009",  # gelatin_box
-    # # "010", # potted_meat_can
-    # "011",  # banana
-    # # "024", # bowl
-    # # "025", # mug
-    # "036",  # wood_block
-    # # "051", # large_clamp
-    # # "052", # extra_large_clamp
-    # # "061",  # foam_brick
+    "004",  # sugar_box
+    "006",  # mustard_bottle
+    "007",  # tuna_fish_can
+    # "008",  # pudding_box
+    # "009",  # gelatin_box
+    # "010", # potted_meat_can
+    "011",  # banana
+    # "024", # bowl
+    # "025", # mug
+    "036",  # wood_block
+    # "051", # large_clamp
+    # "052", # extra_large_clamp
+    # "061",  # foam_brick
 ]
 
 lw_include_names = [
     # "cracker_box",
-    # "bowl",
+    "bowl",
 ]
 
 # Filter USD files based on ID prefixes
@@ -90,7 +90,7 @@
     if base_name in lw_include_names:
         usd_paths.append(usd_file)
 
-num_object_per_env = 20
+num_object_per_env = 70
 
 # Spacing between totes
 tote_spacing = 0.43  # width of tote + gap between totes
@@ -152,7 +152,7 @@ def __post_init__(self):
                             kinematic_enabled=False,
                             disable_gravity=False,
                             # enable_gyroscopic_forces=True,
-                            solver_position_iteration_count=4,
+                            solver_position_iteration_count=10,
                             solver_velocity_iteration_count=0,
                             sleep_threshold=0.005,
                             stabilization_threshold=0.0025,
@@ -195,11 +195,10 @@ class PolicyCfg(ObsGroup):
 
         # observation terms (order preserved)
         # actions = ObsTerm(func=mdp.last_action)
-        obs_dims = ObsTerm(func=mdp.obs_dims)
+        obs_lookahead = ObsTerm(func=mdp.obs_lookahead, max_objects=1)
 
         def __post_init__(self):
             self.enable_corruption = True
-
         #     self.concatenate_terms = True
 
     class SensorCfg(ObsGroup):
@@ -239,15 +238,13 @@ class EventCfg:
 @configclass
 class RewardsCfg:
     """Reward terms for the MDP."""
+    gcu_reward = RewardTerm(
+        func=mdp.gcu_reward, weight=2700.0
+    )
 
-    # gcu_reward = RewardTerm(
-    #     func=mdp.gcu_reward_step, weight=1000.0
-    # )
-
-    object_shift = RewardTerm(func=mdp.object_shift, weight=10.0)
-
-    wasted_volume = RewardTerm(func=mdp.inverse_wasted_volume, weight=40.0)
+    # object_shift = RewardTerm(func=mdp.object_shift, weight=10.0)
 
+    # wasted_volume = RewardTerm(func=mdp.inverse_wasted_volume, weight=40.0)
 
 @configclass
 class TerminationsCfg:
@@ -286,7 +283,7 @@ class PackEnvCfg(ManagerBasedRLEnvCfg):
     """Configuration for the reach end-effector pose tracking environment."""
 
     # Scene settings
-    scene: PackSceneCfg = PackSceneCfg(num_envs=512, env_spacing=2.5, replicate_physics=False)
+    scene: PackSceneCfg = PackSceneCfg(num_envs=512, env_spacing=2.5, replicate_physics=False, clone_in_fabric=True)
     # Basic settings
     observations: ObservationsCfg = ObservationsCfg()
     actions: ActionsCfg = ActionsCfg()
@@ -310,4 +307,4 @@ def __post_init__(self):
         self.sim.physx.gpu_max_rigid_patch_count = 4096 * 4096
         self.sim.physx.gpu_collision_stack_size = 4096 * 4096 * 20
         self.sim.physx.gpu_found_lost_pairs_capacity = 4096 * 4096 * 20
-        self.sim.physx.gpu_max_rigid_contact_count = 2**26
+        self.sim.physx.gpu_max_rigid_contact_count = 2**26
\ No newline at end of file
diff --git a/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py b/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py
index 979a7f1..af396d4 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py
@@ -233,7 +233,18 @@ def get_object_bboxes_batch(self, env_ids, obj_indices):
                 bboxes[i, j] = self.get_object_bbox(env_idx, obj_idx)
         return bboxes
 
-    def get_object_latents_batch(self, env_ids, obj_indices):
+    def set_object_latents(self, obj_latents, env_ids):
+        """
+        Set object latents for specified environments.
+
+        Args:
+            obj_latents: Tensor of object latents
+            env_ids: Environment IDs to update
+        """
+        self.obj_latents[env_ids] = obj_latents
+
+
+    def set_object_voxels(self, obj_voxels):
         """
         Get object latents for a batch of environments and objects.
 
@@ -494,23 +505,23 @@ def eject_totes(self, tote_ids, env_ids, is_dest=True, overfill_check=True, heig
                 self.tote_keys,
             )
 
-        # # Log destination tote ejections
-        overfilled_totes = torch.zeros((self.num_envs, self.num_totes), dtype=torch.bool, device=self.env.device)
-        overfilled_totes[env_ids[overfilled_envs], tote_ids[overfilled_envs]] = True
-        overfilled_totes = overfilled_totes[env_ids]
-        outbound_gcus = self.get_gcu(env_ids)
-        if self.log_stats:
-            if is_dest:
-                # for env_idx, problem in zip(env_ids[overfilled_envs].tolist(), [self.env.unwrapped.bpp.problems[i.item()] for i in env_ids[overfilled_envs]]):
-                #     print("logging dest tote for env_idx:", env_idx)
-                #     self.env.unwrapped.bpp.update_container_heightmap(
-                #         self.env, torch.tensor([env_idx], device=self.env.device), torch.zeros((self.num_envs), device=self.env.device).int()
-                #     )
-                #     self.stats.log_container(env_idx, problem.container)
-                self.stats.log_dest_tote_ejection(tote_ids[overfilled_envs], env_ids[overfilled_envs])
-            self.stats.log_tote_eject_gcus(
-                torch.zeros_like(outbound_gcus), outbound_gcus, totes_ejected=overfilled_totes
-            )
+        # Log destination tote ejections
+        # overfilled_totes = torch.zeros((self.num_envs, self.num_totes), dtype=torch.bool, device=self.env.device)
+        # overfilled_totes[env_ids[overfilled_envs], tote_ids[overfilled_envs]] = True
+        # overfilled_totes = overfilled_totes[env_ids]
+        # outbound_gcus = self.get_gcu(env_ids)
+        # if self.log_stats:
+        #     if is_dest:
+        #         # for env_idx, problem in zip(env_ids[overfilled_envs].tolist(), [self.env.unwrapped.bpp.problems[i.item()] for i in env_ids[overfilled_envs]]):
+        #         #     print("logging dest tote for env_idx:", env_idx)
+        #         #     self.env.unwrapped.bpp.update_container_heightmap(
+        #         #         self.env, torch.tensor([env_idx], device=self.env.device), torch.zeros((self.num_envs), device=self.env.device).int()
+        #         #     )
+        #         #     self.stats.log_container(env_idx, problem.container)
+        #         self.stats.log_dest_tote_ejection(tote_ids[overfilled_envs], env_ids[overfilled_envs])
+        #     self.stats.log_tote_eject_gcus(
+        #         torch.zeros_like(outbound_gcus), outbound_gcus, totes_ejected=overfilled_totes
+        #     )
 
         assets_to_eject = []
         for env_id, tote_id in zip(env_ids[overfilled_envs], tote_ids[overfilled_envs]):

From a94247c68dd5823ffb8fe94480708cc22366ffcf Mon Sep 17 00:00:00 2001
From: Henri Fung <kaikwanfung@gmail.com>
Date: Thu, 20 Nov 2025 13:44:33 -0800
Subject: [PATCH 2/5] Test Settings

---
 .../no_arm/agents/rsl_rl_ppo_camera_cfg.py    | 20 +++---
 .../rsl_rl_ppo_camera_obj_latent_cfg.py       |  1 -
 .../tasks/manager_based/pack/mdp/events.py    | 21 +++---
 .../manager_based/pack/pack_camera_env_cfg.py |  2 -
 .../tasks/manager_based/pack/pack_env_cfg.py  | 69 ++++++++++++-------
 5 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py
index 01efa4e..5de41a8 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py
@@ -5,33 +5,35 @@
 
 from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dCfg
 from isaaclab.utils import configclass
-from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg
+from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg
 
 
 @configclass
 class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg):
-    num_steps_per_env = 24
-    max_iterations = 1500
+    num_steps_per_env = 4
+    max_iterations = 3000
     save_interval = 10
     experiment_name = "no_arm_pack"
-    empirical_normalization = True
+    empirical_normalization = False
     policy = RslRlPpoActorCriticConv2dCfg(
-        init_noise_std=40.0,
+        init_noise_std=1.5,
         actor_hidden_dims=[128, 128],
         critic_hidden_dims=[128, 128],
         activation="elu",
         conv_layers_params=[
             {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
-            {"out_channels": 8, "kernel_size": 3, "stride": 2},
+            # {"out_channels": 8, "kernel_size": 3, "stride": 2},
             {"out_channels": 16, "kernel_size": 3, "stride": 2},
         ],
         conv_linear_output_size=128,  # Project 128×13×10 into 128-dim
     )
-    algorithm = RslRlPpoAlgorithmCfg(
+    algorithm = RslRlGCUPpoAlgorithmCfg(
         value_loss_coef=0.5,
         use_clipped_value_loss=True,
         clip_param=0.2,
-        entropy_coef=0.0025,
+        placement_entropy_coef=0.0005,
+        orientation_entropy_coef=0.01,
         num_learning_epochs=5,
         num_mini_batches=4,
         learning_rate=1.0e-3,
@@ -40,4 +42,4 @@ class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg):
         lam=0.95,
         desired_kl=0.01,
         max_grad_norm=1.0,
-    )
+    )
\ No newline at end of file
diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
index 2decedd..7450d1a 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
@@ -22,7 +22,6 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg):
         activation="elu",
         conv_layers_params=[
             {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
-            # {"out_channels": 8, "kernel_size": 3, "stride": 2},
             {"out_channels": 16, "kernel_size": 3, "stride": 2},
         ],
         conv_linear_output_size=128,  # Project 128×13×10 into 256-dim
diff --git a/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py b/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py
index 11a2dab..020f380 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py
@@ -244,9 +244,7 @@ def load_latents(asset_path):
 
                 # Compute properties only once per unique asset path
                 if asset_path not in mesh_properties_cache:
-                    print("asset_path", asset_path)
                     bbox = compute_mesh_bbox(mesh) * scale
-                    print("bbox: ", bbox)
                     vox = compute_voxelized_geometry_usd(mesh, bbox, scale=scale)
                     volume = mesh_volume(mesh) * (scale**3)
                     latents = load_latents(asset_path)
@@ -731,7 +729,7 @@ def inverse_wasted_volume(env: ManagerBasedRLGCUEnv, gamma=0.99):
     return inverse_wasted_volume
 
 
-def wasted_volume_pbrs(env: ManagerBasedRLGCUEnv, gamma=0.99):
+def wasted_volume_pbrs(env: ManagerBasedRLGCUEnv, gamma = 0.99):
     """
     Computes the wasted volume in the tote, defined as 1 - (% top down volume - GCU of objects).
     1 - (% top down volume - GCU of objects).
@@ -750,18 +748,19 @@ def wasted_volume_pbrs(env: ManagerBasedRLGCUEnv, gamma=0.99):
     top_down_volumes = torch.sum(top_down_volumes_, dim=(1, 2))  # Sum over heightmap dimensions
 
     top_down_volumes = (top_down_volumes / total_volume).squeeze(1)
-    objects_volume = env.tote_manager.stats.recent_gcu_values[
-        torch.arange(env.num_envs, device=env.device), env.tote_manager.dest_totes
-    ]
-    inverse_wasted_volume = objects_volume / (top_down_volumes + 1e-6)
-    pbrs = gamma * inverse_wasted_volume - last_pbrs
-    env.tote_manager.last_pbrs = inverse_wasted_volume
+    objects_volume = (
+        env.tote_manager.stats.recent_gcu_values[
+            torch.arange(env.num_envs, device=env.device), env.tote_manager.dest_totes
+        ]
+    )
+    inverse_wasted_volume = objects_volume / (top_down_volumes + 1e-9)
     if env.tote_manager.reset_pbrs.any():
-        env.tote_manager.last_pbrs[env.tote_manager.reset_pbrs] = 0
+        inverse_wasted_volume[env.tote_manager.reset_pbrs] = 0
         env.tote_manager.reset_pbrs[env.tote_manager.reset_pbrs] = False
+    pbrs = gamma * inverse_wasted_volume - last_pbrs
+    env.tote_manager.last_pbrs = inverse_wasted_volume
     return pbrs
 
-
 def object_overfilled_tote(env: ManagerBasedRLGCUEnv):
     """Checks if any object is overfilled the tote.
     Args:
diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py
index 160f166..05c238e 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py
@@ -74,9 +74,7 @@ class DepthObservationsCfg:
     @configclass
     class PolicyCfg(ObsGroup):
         """Observations for policy group."""
-
         # observation terms (order preserved)
-        # last_action = ObsTerm(func=mdp.last_action)
         obs_lookahead = ObsTerm(func=mdp.obs_lookahead, params={"max_objects": 1})
 
     class SensorCfg(ObsGroup):
diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py
index 0bca656..37a4963 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py
@@ -20,6 +20,7 @@
 from isaaclab.managers import ObservationGroupCfg as ObsGroup
 from isaaclab.managers import ObservationTermCfg as ObsTerm
 from isaaclab.managers import RewardTermCfg as RewardTerm
+from isaaclab.managers import CurriculumTermCfg as CurriculumTerm
 from isaaclab.managers import SceneEntityCfg
 from isaaclab.managers import TerminationTermCfg as DoneTerm
 from isaaclab.scene import InteractiveSceneCfg
@@ -55,25 +56,41 @@
 
 # Define which object IDs to include
 ycb_include_ids = [
-    "003",  # cracker_box
-    "004",  # sugar_box
-    "006",  # mustard_bottle
-    "007",  # tuna_fish_can
-    # "008",  # pudding_box
-    # "009",  # gelatin_box
-    # "010", # potted_meat_can
-    "011",  # banana
-    # "024", # bowl
-    # "025", # mug
-    "036",  # wood_block
-    # "051", # large_clamp
-    # "052", # extra_large_clamp
-    # "061",  # foam_brick
+    # "003",  # cracker_box
+    # "004",  # sugar_box
+    # "006",  # mustard_bottle
+    # "007",  # tuna_fish_can
+    # # "008",  # pudding_box
+    # # "009",  # gelatin_box
+    # # "010", # potted_meat_can
+    # "011",  # banana
+    # # "024", # bowl
+    # # "025", # mug
+    # "036",  # wood_block
+    # # "051", # large_clamp
+    # # "052", # extra_large_clamp
+    # # "061",  # foam_brick
 ]
 
 lw_include_names = [
-    # "cracker_box",
+    "cracker_box",
+    "banana",
     "bowl",
+    "sugar_box",
+    "mustard_bottle",
+    "tuna_fish_can",
+    "wood_block",
+    "tomato_soup_can",
+    "tennis_ball",
+    "rubiks_cube",
+    # "Rope"
+    "pudding_box",
+    "potted_meat_can",
+    # "plate",
+    "mug",
+    # "mini_soccer_ball",
+    "master_chef_can",
+    "chips_can_berkeley_meshes",
 ]
 
 # Filter USD files based on ID prefixes
@@ -90,7 +107,7 @@
     if base_name in lw_include_names:
         usd_paths.append(usd_file)
 
-num_object_per_env = 70
+num_object_per_env = 80
 
 # Spacing between totes
 tote_spacing = 0.43  # width of tote + gap between totes
@@ -152,8 +169,8 @@ def __post_init__(self):
                             kinematic_enabled=False,
                             disable_gravity=False,
                             # enable_gyroscopic_forces=True,
-                            solver_position_iteration_count=10,
-                            solver_velocity_iteration_count=0,
+                            # solver_position_iteration_count=10,
+                            # solver_velocity_iteration_count=0,
                             sleep_threshold=0.005,
                             stabilization_threshold=0.0025,
                             # max_depenetration_velocity=1000.0,
@@ -195,10 +212,12 @@ class PolicyCfg(ObsGroup):
 
         # observation terms (order preserved)
         # actions = ObsTerm(func=mdp.last_action)
-        obs_lookahead = ObsTerm(func=mdp.obs_lookahead, max_objects=1)
+        obs_dims = ObsTerm(func=mdp.obs_dims)
+        # obs_lookahead = ObsTerm(func=mdp.obs_lookahead)
 
         def __post_init__(self):
             self.enable_corruption = True
+
         #     self.concatenate_terms = True
 
     class SensorCfg(ObsGroup):
@@ -239,12 +258,14 @@ class EventCfg:
 class RewardsCfg:
     """Reward terms for the MDP."""
     gcu_reward = RewardTerm(
-        func=mdp.gcu_reward, weight=2700.0
+        func=mdp.gcu_reward, weight=500.0
     )
 
     # object_shift = RewardTerm(func=mdp.object_shift, weight=10.0)
 
-    # wasted_volume = RewardTerm(func=mdp.inverse_wasted_volume, weight=40.0)
+    episode_bonus = RewardTerm(func=mdp.episode_bonus, weight=10.0)
+    unused_phi_s = RewardTerm(func=mdp.inverse_wasted_volume, weight=0.0)  # For logging only
+    potential_function_F = RewardTerm(func=mdp.wasted_volume_pbrs, weight=10.0)
 
 @configclass
 class TerminationsCfg:
@@ -258,11 +279,7 @@ class TerminationsCfg:
 @configclass
 class CurriculumCfg:
     """Curriculum terms for the MDP."""
-
     pass
-    # object_shift = CurriculumTerm(
-    #     func=mdp.modify_reward_weight, params={"term_name": "object_shift", "weight": 50.0, "num_steps": 10000}
-    # )
 
 
 @configclass
@@ -303,7 +320,7 @@ def __post_init__(self):
         self.episode_length_s = 10.0
         self.viewer.eye = (0, 0.1, 5.5)
         # simulation settings
-        self.sim.dt = 1.0 / 90.0
+        self.sim.dt = 1.0 / 60.0
         self.sim.physx.gpu_max_rigid_patch_count = 4096 * 4096
         self.sim.physx.gpu_collision_stack_size = 4096 * 4096 * 20
         self.sim.physx.gpu_found_lost_pairs_capacity = 4096 * 4096 * 20

From 90d9fec986f51807cbd0b8cd1a8483a4bbfb0e91 Mon Sep 17 00:00:00 2001
From: Henri Fung <kaikwanfung@gmail.com>
Date: Sun, 23 Nov 2025 12:27:49 -0800
Subject: [PATCH 3/5] Fixed gcu to use fifo

---
 rsl_rl                                        |  2 +-
 .../gculab_rl/rsl_rl/gcu_vecenv_wrapper.py    | 23 ++++++++++++++-----
 .../no_arm/agents/rsl_rl_ppo_camera_cfg.py    |  2 +-
 .../rsl_rl_ppo_camera_obj_latent_cfg.py       |  9 ++++----
 4 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/rsl_rl b/rsl_rl
index 52c5bd0..9ca962b 160000
--- a/rsl_rl
+++ b/rsl_rl
@@ -1 +1 @@
-Subproject commit 52c5bd047aebf194b8a7784669678edc0e53457c
+Subproject commit 9ca962baf942152a7bf6de285c833df654e17dda
diff --git a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
index bfd4bef..08cc755 100644
--- a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
+++ b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
@@ -129,11 +129,14 @@ def _convert_to_pos_quat(self, actions: torch.Tensor, object_to_pack: list) -> t
             self.env.unwrapped.tote_manager.get_object_bbox(env_idx, obj_idx)
             for env_idx, obj_idx in zip(
                 torch.arange(actions.shape[0], device=self.env.unwrapped.device),
-                object_to_pack,
+                object_to_pack
             )
         ])
-        
-        rotated_dim = calculate_rotated_bounding_box(bbox_offset, quats, device=self.env.unwrapped.device)
+        rotated_dim = (
+            calculate_rotated_bounding_box(
+                bbox_offset, quats, device=self.env.unwrapped.device
+            )
+        )
         x_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0]
         y_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1]
         x = torch.sigmoid(5 * x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0])
@@ -206,9 +209,17 @@ def step(
             torch.arange(self.env.unwrapped.num_envs, device=self.env.unwrapped.device),
             tote_ids,
         )[0]
-        object_to_pack = [row[0] for row in packable_objects]
+
+        # Update FIFO queues with new packable objects
+        self.env.unwrapped.bpp.update_fifo_queues(packable_objects)
+
+        # Select objects using FIFO (First In, First Out) ordering
+        object_to_pack = self.env.unwrapped.bpp.select_fifo_packable_objects(packable_objects, self.env.unwrapped.device)
+        # Remove the selected object from the front of the queue
+        self.env.unwrapped.bpp.remove_selected_from_fifo(object_to_pack)
+
         for i in range(self.env.unwrapped.num_envs):
-            self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()]))
+            self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()], device=self.env.unwrapped.device))
 
         actions, xy_pos_range, rotated_dim = self._convert_to_pos_quat(actions, object_to_pack)
 
@@ -217,7 +228,7 @@ def step(
         actions = torch.cat(
             [
                 tote_ids.unsqueeze(1).to(self.env.unwrapped.device),  # Destination tote IDs
-                torch.tensor(object_to_pack, device=self.env.unwrapped.device).unsqueeze(1),  # Object indices
+                object_to_pack.unsqueeze(1),  # Object indices
                 actions,
             ],
             dim=1,
diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py
index 5de41a8..e90a7c4 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py
@@ -15,7 +15,7 @@ class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg):
     max_iterations = 3000
     save_interval = 10
     experiment_name = "no_arm_pack"
-    empirical_normalization = False
+    empirical_normalization = True
     policy = RslRlPpoActorCriticConv2dCfg(
         init_noise_std=1.5,
         actor_hidden_dims=[128, 128],
diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
index 7450d1a..b4683e1 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
@@ -22,9 +22,10 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg):
         activation="elu",
         conv_layers_params=[
             {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
+            # {"out_channels": 8, "kernel_size": 3, "stride": 2},
             {"out_channels": 16, "kernel_size": 3, "stride": 2},
         ],
-        conv_linear_output_size=128,  # Project 128×13×10 into 256-dim
+        conv_linear_output_size=128,  # Project 128×13×10 into 128-dim
         pointnet_layers_params=[
             {"out_channels": 64},
             {"out_channels": 256},
@@ -36,14 +37,14 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg):
         value_loss_coef=0.5,
         use_clipped_value_loss=True,
         clip_param=0.2,
-        placement_entropy_coef=0.0005,
-        orientation_entropy_coef=0.01,
+        placement_entropy_coef=0.0,
+        orientation_entropy_coef=0.005,
         num_learning_epochs=5,
         num_mini_batches=4,
         learning_rate=1.0e-3,
         schedule="adaptive",
         gamma=0.99,
         lam=0.95,
-        desired_kl=0.01,
+        desired_kl=0.03,
         max_grad_norm=1.0,
     )

From 72f5932a4127bc65a5b63c24a4c796806dc52457 Mon Sep 17 00:00:00 2001
From: Henri Fung <kaikwanfung@gmail.com>
Date: Sun, 23 Nov 2025 13:37:20 -0800
Subject: [PATCH 4/5] Fix exporter to use gculab's

---
 scripts/rsl_rl/play.py                        | 4 +++-
 source/gculab_rl/gculab_rl/rsl_rl/__init__.py | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py
index 1ea7d0b..eab160d 100644
--- a/scripts/rsl_rl/play.py
+++ b/scripts/rsl_rl/play.py
@@ -61,12 +61,14 @@
 import geodude.tasks  # noqa: F401
 from gculab_rl.rsl_rl import (
     RslRlGCUVecEnvWrapper,
+    export_policy_as_jit,
+    export_policy_as_onnx,
 )
 from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
 from isaaclab.utils.assets import retrieve_file_path
 from isaaclab.utils.dict import print_dict
 from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
-from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
 from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg
 
 from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner
diff --git a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py
index 4b09a40..be0af8a 100644
--- a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py
+++ b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py
@@ -17,3 +17,4 @@
 
 from .gcu_vecenv_wrapper import RslRlGCUVecEnvWrapper
 from .rl_cfg import RslRlPpoActorCriticConv2dCfg, RslRlGCUPpoAlgorithmCfg, RslRlPpoActorCriticConv2dPointNetCfg
+from .exporter import export_policy_as_jit, export_policy_as_onnx
\ No newline at end of file

From 8f1f25fd52403bd8bb086aded778ac734ff751ba Mon Sep 17 00:00:00 2001
From: Henri Fung <kaikwanfung@gmail.com>
Date: Tue, 25 Nov 2025 20:57:51 -0800
Subject: [PATCH 5/5] Adjust settings

---
 .../config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
index b4683e1..9faecfd 100644
--- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
+++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
@@ -38,13 +38,13 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg):
         use_clipped_value_loss=True,
         clip_param=0.2,
         placement_entropy_coef=0.0,
-        orientation_entropy_coef=0.005,
+        orientation_entropy_coef=0.0,
         num_learning_epochs=5,
         num_mini_batches=4,
         learning_rate=1.0e-3,
         schedule="adaptive",
         gamma=0.99,
         lam=0.95,
-        desired_kl=0.03,
+        desired_kl=0.005,
         max_grad_norm=1.0,
     )