personalrobotics · kaikwan · Nov 20, 2025 · Nov 20, 2025 · Nov 23, 2025 · Nov 23, 2025
diff --git a/gcu_objects b/gcu_objects
diff --git a/rsl_rl b/rsl_rl
diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py
@@ -71,7 +71,7 @@
 from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
 from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg
 
-from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner
+from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner
 from rsl_rl.utils import normalize_and_flatten_image_obs
 
 
@@ -122,7 +122,7 @@ def main():
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    if agent_cfg.policy.class_name == "ActorCriticConv2d":
+    if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
         env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
     else:
         env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
@@ -131,6 +131,8 @@ def main():
     # load previously trained model
     if agent_cfg.policy.class_name == "ActorCriticConv2d":
         ppo_runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
+        ppo_runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
     else:
         ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
     ppo_runner.load(resume_path)
@@ -191,13 +193,13 @@ def main():
             print(
                 "GCU ", env.unwrapped.tote_manager.get_gcu(torch.arange(args_cli.num_envs, device=env.unwrapped.device))
             )
-            # print("\n===== Ejection Summary =====")
-            # print(f"Total steps: {stats['total_steps']}")
-            # if ejection_summary != {}:
-            #     for i in range(len(ejection_summary.keys())):
-            #         env_id = list(ejection_summary.keys())[i]
-            #         print(ejection_summary[env_id])
-            #     print("==========================\n")
+            print("\n===== Ejection Summary =====")
+            print(f"Total steps: {stats['total_steps']}")
+            if ejection_summary != {}:
+                for i in range(len(ejection_summary.keys())):
+                    env_id = list(ejection_summary.keys())[i]
+                    print(ejection_summary[env_id])
+                print("==========================\n")
             # env.unwrapped.bpp.update_container_heightmap(env, torch.arange(args_cli.num_envs).to(env.unwrapped.device), torch.zeros(args_cli.num_envs, device=env.unwrapped.device).int())
             # env stepping
             obs, _, _, infos = env.step(actions, image_obs=image_obs)

diff --git a/scripts/rsl_rl/train.py b/scripts/rsl_rl/train.py
@@ -95,7 +95,7 @@
 from isaaclab_tasks.utils import get_checkpoint_path
 from isaaclab_tasks.utils.hydra import hydra_task_config
 
-from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner
+from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner
 
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
@@ -164,14 +164,16 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         env = gym.wrappers.RecordVideo(env, **video_kwargs)
 
     # wrap around environment for rsl-rl
-    if agent_cfg.policy.class_name == "ActorCriticConv2d":
+    if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
         env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
     else:
         env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
 
     # create runner from rsl-rl
     if agent_cfg.policy.class_name == "ActorCriticConv2d":
         runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
+    elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
+        runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
     else:
         runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
     # write git state to logs

diff --git a/source/gculab/gculab/envs/manager_based_rl_gcu_env.py b/source/gculab/gculab/envs/manager_based_rl_gcu_env.py
@@ -220,12 +220,12 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
         for i in range(wait_time):
             self.scene.write_data_to_sim()
             self.sim.step(render=False)
-            if (
-                self._sim_step_counter % self.cfg.sim.render_interval == 0
-                and is_rendering
-                and self.tote_manager.animate
-            ):
-                self.sim.render()
+            # if (
+            #     self._sim_step_counter % self.cfg.sim.render_interval == 0
+            #     and is_rendering
+            #     and self.tote_manager.animate
+            # ):
+            #     self.sim.render()
             # update buffers at sim dt - only on last iteration to reduce GPU interface calls
             if i == wait_time - 1:
                 self.scene.update(dt=self.physics_dt)

diff --git a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py
@@ -16,4 +16,5 @@
 """
 
 from .gcu_vecenv_wrapper import RslRlGCUVecEnvWrapper
-from .rl_cfg import RslRlPpoActorCriticConv2dCfg
+from .rl_cfg import RslRlPpoActorCriticConv2dCfg, RslRlGCUPpoAlgorithmCfg, RslRlPpoActorCriticConv2dPointNetCfg
+from .exporter import export_policy_as_jit, export_policy_as_onnx
diff --git a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
@@ -129,15 +129,18 @@ def _convert_to_pos_quat(self, actions: torch.Tensor, object_to_pack: list) -> t
             self.env.unwrapped.tote_manager.get_object_bbox(env_idx, obj_idx)
             for env_idx, obj_idx in zip(
                 torch.arange(actions.shape[0], device=self.env.unwrapped.device),
-                object_to_pack,
+                object_to_pack
             )
         ])
-
-        rotated_dim = calculate_rotated_bounding_box(bbox_offset, quats, device=self.env.unwrapped.device)
+        rotated_dim = (
+            calculate_rotated_bounding_box(
+                bbox_offset, quats, device=self.env.unwrapped.device
+            )
+        )
         x_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0]
         y_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1]
-        x = torch.sigmoid(x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0])
-        y = torch.sigmoid(y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1])
+        x = torch.sigmoid(5 * x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0])
+        y = torch.sigmoid(5 * y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1])
 
         # Compute z analytically for each sample in the batch using multiprocessing
         z = torch.zeros_like(x)
@@ -206,9 +209,17 @@ def step(
             torch.arange(self.env.unwrapped.num_envs, device=self.env.unwrapped.device),
             tote_ids,
         )[0]
-        object_to_pack = [row[0] for row in packable_objects]
+
+        # Update FIFO queues with new packable objects
+        self.env.unwrapped.bpp.update_fifo_queues(packable_objects)
+
+        # Select objects using FIFO (First In, First Out) ordering
+        object_to_pack = self.env.unwrapped.bpp.select_fifo_packable_objects(packable_objects, self.env.unwrapped.device)
+        # Remove the selected object from the front of the queue
+        self.env.unwrapped.bpp.remove_selected_from_fifo(object_to_pack)
+
         for i in range(self.env.unwrapped.num_envs):
-            self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()]))
+            self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()], device=self.env.unwrapped.device))
 
         actions, xy_pos_range, rotated_dim = self._convert_to_pos_quat(actions, object_to_pack)
 
@@ -217,7 +228,7 @@ def step(
         actions = torch.cat(
             [
                 tote_ids.unsqueeze(1).to(self.env.unwrapped.device),  # Destination tote IDs
-                torch.tensor(object_to_pack, device=self.env.unwrapped.device).unsqueeze(1),  # Object indices
+                object_to_pack.unsqueeze(1),  # Object indices
                 actions,
             ],
             dim=1,

diff --git a/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py b/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py
@@ -8,7 +8,10 @@
 
 from isaaclab.utils import configclass
 from isaaclab_rl.rsl_rl import RslRlPpoActorCriticCfg
+from isaaclab.utils import configclass
 
+from isaaclab_rl.rsl_rl.rnd_cfg import RslRlRndCfg
+from isaaclab_rl.rsl_rl.symmetry_cfg import RslRlSymmetryCfg
 
 @configclass
 class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):
@@ -26,3 +29,87 @@ class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):
 
     conv_linear_output_size: int = 16
     """Output size of the linear layer after the convolutional features are flattened."""
+
+@configclass
+class RslRlPpoActorCriticConv2dPointNetCfg(RslRlPpoActorCriticConv2dCfg):
+    """Configuration for the PPO actor-critic networks with convolutional layers and PointNet."""
+
+    class_name: str = "ActorCriticConv2dPointNet"
+    """The policy class name. Default is ActorCriticConv2dPointNet."""
+
+    pointnet_layers_params: list[dict] = [
+        {"out_channels": 64},
+        {"out_channels": 256},
+    ]
+    """List of PointNet layer parameters."""
+    pointnet_in_dim: int = 8
+    """Input dimension for the PointNet."""
+    pointnet_num_points: int = 512
+    """Number of points for the PointNet."""
+
+############################
+# Algorithm configurations #
+############################
+
+
+@configclass
+class RslRlGCUPpoAlgorithmCfg:
+    """Configuration for the PPO algorithm."""
+
+    class_name: str = "PPO"
+    """The algorithm class name. Default is PPO."""
+
+    num_learning_epochs: int = MISSING
+    """The number of learning epochs per update."""
+
+    num_mini_batches: int = MISSING
+    """The number of mini-batches per update."""
+
+    learning_rate: float = MISSING
+    """The learning rate for the policy."""
+
+    schedule: str = MISSING
+    """The learning rate schedule."""
+
+    gamma: float = MISSING
+    """The discount factor."""
+
+    lam: float = MISSING
+    """The lambda parameter for Generalized Advantage Estimation (GAE)."""
+
+    placement_entropy_coef: float = MISSING
+    """The coefficient for the placement_entropy_coef loss."""
+
+    orientation_entropy_coef: float = MISSING
+    """The coefficient for the orientation_entropy_coef loss."""
+
+    desired_kl: float = MISSING
+    """The desired KL divergence."""
+
+    max_grad_norm: float = MISSING
+    """The maximum gradient norm."""
+
+    value_loss_coef: float = MISSING
+    """The coefficient for the value loss."""
+
+    use_clipped_value_loss: bool = MISSING
+    """Whether to use clipped value loss."""
+
+    clip_param: float = MISSING
+    """The clipping parameter for the policy."""
+
+    normalize_advantage_per_mini_batch: bool = False
+    """Whether to normalize the advantage per mini-batch. Default is False.
+
+    If True, the advantage is normalized over the mini-batches only.
+    Otherwise, the advantage is normalized over the entire collected trajectories.
+    """
+
+    symmetry_cfg: RslRlSymmetryCfg | None = None
+    """The symmetry configuration. Default is None, in which case symmetry is not used."""
+
+    rnd_cfg: RslRlRndCfg | None = None
+    """The configuration for the Random Network Distillation (RND) module. Default is None,
+    in which case RND is not used.
+    """
+
diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py
@@ -30,3 +30,13 @@
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_cfg:NoArmPackPPOCameraRunnerCfg",
     },
 )
+
+gym.register(
+    id="Isaac-Pack-NoArm-Camera-Obj-Latent-v0",
+    entry_point="gculab.envs:ManagerBasedRLGCUEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_camera_env_cfg:NoArmPackCameraObjLatentEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_obj_latent_cfg:NoArmPackPPOCameraObjLatentRunnerCfg",
+    },
+)
diff --git a/...ce/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py b/...ce/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py
@@ -5,33 +5,35 @@
 
 from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dCfg
 from isaaclab.utils import configclass
-from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg
+from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg
 
 
 @configclass
 class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg):
-    num_steps_per_env = 24
-    max_iterations = 1500
+    num_steps_per_env = 4
+    max_iterations = 3000
     save_interval = 10
     experiment_name = "no_arm_pack"
     empirical_normalization = True
     policy = RslRlPpoActorCriticConv2dCfg(
-        init_noise_std=40.0,
+        init_noise_std=1.5,
         actor_hidden_dims=[128, 128],
         critic_hidden_dims=[128, 128],
         activation="elu",
         conv_layers_params=[
             {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
-            {"out_channels": 8, "kernel_size": 3, "stride": 2},
+            # {"out_channels": 8, "kernel_size": 3, "stride": 2},
             {"out_channels": 16, "kernel_size": 3, "stride": 2},
         ],
         conv_linear_output_size=128,  # Project 128×13×10 into 128-dim
     )
-    algorithm = RslRlPpoAlgorithmCfg(
+    algorithm = RslRlGCUPpoAlgorithmCfg(
         value_loss_coef=0.5,
         use_clipped_value_loss=True,
         clip_param=0.2,
-        entropy_coef=0.0025,
+        placement_entropy_coef=0.0005,
+        orientation_entropy_coef=0.01,
         num_learning_epochs=5,
         num_mini_batches=4,
         learning_rate=1.0e-3,
@@ -40,4 +42,4 @@ class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg):
         lam=0.95,
         desired_kl=0.01,
         max_grad_norm=1.0,
-    )
+    )
diff --git a/...geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/...geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dPointNetCfg
+from isaaclab.utils import configclass
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg
+from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg
+
+@configclass
+class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 4
+    max_iterations = 3000
+    save_interval = 10
+    experiment_name = "no_arm_pack"
+    empirical_normalization = True
+    policy = RslRlPpoActorCriticConv2dPointNetCfg(
+        init_noise_std=1.5,
+        actor_hidden_dims=[128, 128],
+        critic_hidden_dims=[128, 128],
+        activation="elu",
+        conv_layers_params=[
+            {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
+            # {"out_channels": 8, "kernel_size": 3, "stride": 2},
+            {"out_channels": 16, "kernel_size": 3, "stride": 2},
+        ],
+        conv_linear_output_size=128,  # Project 128×13×10 into 128-dim
+        pointnet_layers_params=[
+            {"out_channels": 64},
+            {"out_channels": 256},
+        ],
+        pointnet_in_dim=8,
+        pointnet_num_points=512,
+    )
+    algorithm = RslRlGCUPpoAlgorithmCfg(
+        value_loss_coef=0.5,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        placement_entropy_coef=0.0,
+        orientation_entropy_coef=0.0,
+        num_learning_epochs=5,
+        num_mini_batches=4,
+        learning_rate=1.0e-3,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.005,
+        max_grad_norm=1.0,
+    )
+47 −4		rsl_rl/algorithms/ppo.py
+4 −0		rsl_rl/modules/__init__.py
+295 −0		rsl_rl/modules/actor_critic_conv2d_pointnet.py
+56 −39		rsl_rl/modules/gcu_actor_critic.py
+61 −0		rsl_rl/modules/gcu_actor_critic_conv2d_pointnet.py
+3 −1		rsl_rl/runners/__init__.py
+7 −3		rsl_rl/runners/gcu_on_policy_runner.py
+266 −1		rsl_rl/runners/on_policy_runner.py
+51 −6		rsl_rl/runners/on_policy_runner_conv2d.py
+141 −0		rsl_rl/runners/on_policy_runner_conv2d_pointnet.py
+1 −1		rsl_rl/storage/rollout_storage.py
+1 −0		rsl_rl/utils/__init__.py
+10 −0		rsl_rl/utils/utils.py
+67 −0		rsl_rl/utils/wandb_utils.py