From fc978745dbf6846be02e752842778413f0049d9a Mon Sep 17 00:00:00 2001 From: Henri Fung Date: Thu, 20 Nov 2025 12:23:22 -0800 Subject: [PATCH 1/5] Working Latent temp --- gcu_objects | 2 +- rsl_rl | 2 +- scripts/rsl_rl/play.py | 24 ++--- scripts/rsl_rl/train.py | 6 +- .../gculab/envs/manager_based_rl_gcu_env.py | 12 +-- source/gculab_rl/gculab_rl/rsl_rl/__init__.py | 2 +- .../gculab_rl/rsl_rl/gcu_vecenv_wrapper.py | 4 +- source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py | 87 +++++++++++++++++++ .../pack/config/no_arm/__init__.py | 10 +++ .../rsl_rl_ppo_camera_obj_latent_cfg.py | 50 +++++++++++ .../config/no_arm/joint_pos_camera_env_cfg.py | 13 +++ .../tasks/manager_based/pack/mdp/rewards.py | 5 ++ .../manager_based/pack/pack_camera_env_cfg.py | 40 +++++++-- .../tasks/manager_based/pack/pack_env_cfg.py | 51 +++++------ .../manager_based/pack/utils/tote_manager.py | 47 ++++++---- 15 files changed, 280 insertions(+), 75 deletions(-) create mode 100644 source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py diff --git a/gcu_objects b/gcu_objects index b2cd22a..a582d59 160000 --- a/gcu_objects +++ b/gcu_objects @@ -1 +1 @@ -Subproject commit b2cd22a08c9af865b5b635d619658d2257714186 +Subproject commit a582d59dda01b85f5290cb692b2644e9c3d53e9c diff --git a/rsl_rl b/rsl_rl index b89619a..52c5bd0 160000 --- a/rsl_rl +++ b/rsl_rl @@ -1 +1 @@ -Subproject commit b89619ad1449d692fe2ac41c55258862be0d691a +Subproject commit 52c5bd047aebf194b8a7784669678edc0e53457c diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index 99b4b84..1ea7d0b 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -61,17 +61,15 @@ import geodude.tasks # noqa: F401 from gculab_rl.rsl_rl import ( RslRlGCUVecEnvWrapper, - export_policy_as_jit, - export_policy_as_onnx, ) from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent from isaaclab.utils.assets import retrieve_file_path from isaaclab.utils.dict import print_dict from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg -from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner +from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner from rsl_rl.utils import normalize_and_flatten_image_obs @@ -122,7 +120,7 @@ def main(): env = gym.wrappers.RecordVideo(env, **video_kwargs) # wrap around environment for rsl-rl - if agent_cfg.policy.class_name == "ActorCriticConv2d": + if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet": env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) else: env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) @@ -131,6 +129,8 @@ def main(): # load previously trained model if agent_cfg.policy.class_name == "ActorCriticConv2d": ppo_runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet": + ppo_runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) else: ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) ppo_runner.load(resume_path) @@ -191,13 +191,13 @@ def main(): print( "GCU ", env.unwrapped.tote_manager.get_gcu(torch.arange(args_cli.num_envs, device=env.unwrapped.device)) ) - # print("\n===== Ejection Summary =====") - # print(f"Total steps: {stats['total_steps']}") - # if ejection_summary != {}: - # for i in range(len(ejection_summary.keys())): - # env_id = list(ejection_summary.keys())[i] - # print(ejection_summary[env_id]) - # print("==========================\n") + print("\n===== Ejection Summary =====") + print(f"Total steps: {stats['total_steps']}") + if ejection_summary != {}: + for i in range(len(ejection_summary.keys())): + env_id = list(ejection_summary.keys())[i] + print(ejection_summary[env_id]) + print("==========================\n") # env.unwrapped.bpp.update_container_heightmap(env, torch.arange(args_cli.num_envs).to(env.unwrapped.device), torch.zeros(args_cli.num_envs, device=env.unwrapped.device).int()) # env stepping obs, _, _, infos = env.step(actions, image_obs=image_obs) diff --git a/scripts/rsl_rl/train.py b/scripts/rsl_rl/train.py index b5326c6..6e280fb 100644 --- a/scripts/rsl_rl/train.py +++ b/scripts/rsl_rl/train.py @@ -95,7 +95,7 @@ from isaaclab_tasks.utils import get_checkpoint_path from isaaclab_tasks.utils.hydra import hydra_task_config -from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner +from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True @@ -164,7 +164,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env = gym.wrappers.RecordVideo(env, **video_kwargs) # wrap around environment for rsl-rl - if agent_cfg.policy.class_name == "ActorCriticConv2d": + if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet": env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) else: env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) @@ -172,6 +172,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen # create runner from rsl-rl if agent_cfg.policy.class_name == "ActorCriticConv2d": runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet": + runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) else: runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) # write git state to logs diff --git a/source/gculab/gculab/envs/manager_based_rl_gcu_env.py b/source/gculab/gculab/envs/manager_based_rl_gcu_env.py index 2abe9e1..e453f87 100644 --- a/source/gculab/gculab/envs/manager_based_rl_gcu_env.py +++ b/source/gculab/gculab/envs/manager_based_rl_gcu_env.py @@ -220,12 +220,12 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: for i in range(wait_time): self.scene.write_data_to_sim() self.sim.step(render=False) - if ( - self._sim_step_counter % self.cfg.sim.render_interval == 0 - and is_rendering - and self.tote_manager.animate - ): - self.sim.render() + # if ( + # self._sim_step_counter % self.cfg.sim.render_interval == 0 + # and is_rendering + # and self.tote_manager.animate + # ): + # self.sim.render() # update buffers at sim dt - only on last iteration to reduce GPU interface calls if i == wait_time - 1: self.scene.update(dt=self.physics_dt) diff --git a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py index 4c8ccf5..4b09a40 100644 --- a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py +++ b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py @@ -16,4 +16,4 @@ """ from .gcu_vecenv_wrapper import RslRlGCUVecEnvWrapper -from .rl_cfg import RslRlPpoActorCriticConv2dCfg +from .rl_cfg import RslRlPpoActorCriticConv2dCfg, RslRlGCUPpoAlgorithmCfg, RslRlPpoActorCriticConv2dPointNetCfg diff --git a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py index b857e50..bfd4bef 100644 --- a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py +++ b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py @@ -136,8 +136,8 @@ def _convert_to_pos_quat(self, actions: torch.Tensor, object_to_pack: list) -> t rotated_dim = calculate_rotated_bounding_box(bbox_offset, quats, device=self.env.unwrapped.device) x_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0] y_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1] - x = torch.sigmoid(x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0]) - y = torch.sigmoid(y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1]) + x = torch.sigmoid(5 * x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0]) + y = torch.sigmoid(5 * y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1]) # Compute z analytically for each sample in the batch using multiprocessing z = torch.zeros_like(x) diff --git a/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py b/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py index 5e8c393..ce009ff 100644 --- a/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py +++ b/source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py @@ -8,7 +8,10 @@ from isaaclab.utils import configclass from isaaclab_rl.rsl_rl import RslRlPpoActorCriticCfg +from isaaclab.utils import configclass +from isaaclab_rl.rsl_rl.rnd_cfg import RslRlRndCfg +from isaaclab_rl.rsl_rl.symmetry_cfg import RslRlSymmetryCfg @configclass class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg): @@ -26,3 +29,87 @@ class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg): conv_linear_output_size: int = 16 """Output size of the linear layer after the convolutional features are flattened.""" + +@configclass +class RslRlPpoActorCriticConv2dPointNetCfg(RslRlPpoActorCriticConv2dCfg): + """Configuration for the PPO actor-critic networks with convolutional layers and PointNet.""" + + class_name: str = "ActorCriticConv2dPointNet" + """The policy class name. Default is ActorCriticConv2dPointNet.""" + + pointnet_layers_params: list[dict] = [ + {"out_channels": 64}, + {"out_channels": 256}, + ] + """List of PointNet layer parameters.""" + pointnet_in_dim: int = 8 + """Input dimension for the PointNet.""" + pointnet_num_points: int = 512 + """Number of points for the PointNet.""" + +############################ +# Algorithm configurations # +############################ + + +@configclass +class RslRlGCUPpoAlgorithmCfg: + """Configuration for the PPO algorithm.""" + + class_name: str = "PPO" + """The algorithm class name. Default is PPO.""" + + num_learning_epochs: int = MISSING + """The number of learning epochs per update.""" + + num_mini_batches: int = MISSING + """The number of mini-batches per update.""" + + learning_rate: float = MISSING + """The learning rate for the policy.""" + + schedule: str = MISSING + """The learning rate schedule.""" + + gamma: float = MISSING + """The discount factor.""" + + lam: float = MISSING + """The lambda parameter for Generalized Advantage Estimation (GAE).""" + + placement_entropy_coef: float = MISSING + """The coefficient for the placement_entropy_coef loss.""" + + orientation_entropy_coef: float = MISSING + """The coefficient for the orientation_entropy_coef loss.""" + + desired_kl: float = MISSING + """The desired KL divergence.""" + + max_grad_norm: float = MISSING + """The maximum gradient norm.""" + + value_loss_coef: float = MISSING + """The coefficient for the value loss.""" + + use_clipped_value_loss: bool = MISSING + """Whether to use clipped value loss.""" + + clip_param: float = MISSING + """The clipping parameter for the policy.""" + + normalize_advantage_per_mini_batch: bool = False + """Whether to normalize the advantage per mini-batch. Default is False. + + If True, the advantage is normalized over the mini-batches only. + Otherwise, the advantage is normalized over the entire collected trajectories. + """ + + symmetry_cfg: RslRlSymmetryCfg | None = None + """The symmetry configuration. Default is None, in which case symmetry is not used.""" + + rnd_cfg: RslRlRndCfg | None = None + """The configuration for the Random Network Distillation (RND) module. Default is None, + in which case RND is not used. + """ + diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py index a73635e..21ee668 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/__init__.py @@ -30,3 +30,13 @@ "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_cfg:NoArmPackPPOCameraRunnerCfg", }, ) + +gym.register( + id="Isaac-Pack-NoArm-Camera-Obj-Latent-v0", + entry_point="gculab.envs:ManagerBasedRLGCUEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_camera_env_cfg:NoArmPackCameraObjLatentEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_obj_latent_cfg:NoArmPackPPOCameraObjLatentRunnerCfg", + }, +) diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py new file mode 100644 index 0000000..2decedd --- /dev/null +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py @@ -0,0 +1,50 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dPointNetCfg +from isaaclab.utils import configclass +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg +from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg + +@configclass +class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 4 + max_iterations = 3000 + save_interval = 10 + experiment_name = "no_arm_pack" + empirical_normalization = True + policy = RslRlPpoActorCriticConv2dPointNetCfg( + init_noise_std=1.5, + actor_hidden_dims=[128, 128], + critic_hidden_dims=[128, 128], + activation="elu", + conv_layers_params=[ + {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1}, + # {"out_channels": 8, "kernel_size": 3, "stride": 2}, + {"out_channels": 16, "kernel_size": 3, "stride": 2}, + ], + conv_linear_output_size=128, # Project 128×13×10 into 256-dim + pointnet_layers_params=[ + {"out_channels": 64}, + {"out_channels": 256}, + ], + pointnet_in_dim=8, + pointnet_num_points=512, + ) + algorithm = RslRlGCUPpoAlgorithmCfg( + value_loss_coef=0.5, + use_clipped_value_loss=True, + clip_param=0.2, + placement_entropy_coef=0.0005, + orientation_entropy_coef=0.01, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=1.0e-3, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py index 5fdd1e9..a621e73 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/joint_pos_camera_env_cfg.py @@ -7,6 +7,7 @@ from isaaclab.utils import configclass from geodude.tasks.manager_based.pack.pack_camera_env_cfg import ( PackDepthCameraEnvCfg, + PackDepthCameraObjLatentEnvCfg, ) ## @@ -26,6 +27,18 @@ def __post_init__(self): ) # asset name is not used in this env +@configclass +class NoArmPackCameraObjLatentEnvCfg(PackDepthCameraObjLatentEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + self.scene.robot = None + self.actions.packing_action = mdp.PackingActionCfg( + asset_name="tote1", place_obj_bottomLeft=True + ) # asset name is not used in this env + + @configclass class NoArmPackCameraEnvCfg_PLAY(NoArmPackCameraEnvCfg): def __post_init__(self): diff --git a/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py b/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py index 53bc27e..55ad86e 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py +++ b/source/geodude/geodude/tasks/manager_based/pack/mdp/rewards.py @@ -67,3 +67,8 @@ def orientation_command_error(env: ManagerBasedRLEnv, command_name: str, asset_c des_quat_w = quat_mul(asset.data.root_state_w[:, 3:7], des_quat_b) curr_quat_w = asset.data.body_state_w[:, asset_cfg.body_ids[0], 3:7] # type: ignore return quat_error_magnitude(curr_quat_w, des_quat_w) + + +def episode_bonus(env: ManagerBasedRLEnv) -> torch.Tensor: + """Gives a bonus reward at the end of the episode.""" + return 1.0 \ No newline at end of file diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py index 963079b..160f166 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py @@ -76,12 +76,31 @@ class PolicyCfg(ObsGroup): """Observations for policy group.""" # observation terms (order preserved) - # actions = ObsTerm(func=mdp.last_action) - obs_dims = ObsTerm(func=mdp.obs_dims) + # last_action = ObsTerm(func=mdp.last_action) + obs_lookahead = ObsTerm(func=mdp.obs_lookahead, params={"max_objects": 1}) - # def __post_init__(self): - # self.enable_corruption = True - # self.concatenate_terms = True + class SensorCfg(ObsGroup): + """Observations for sensor group.""" + + image = ObsTerm( + func=mdp.image, params={"sensor_cfg": SceneEntityCfg("tiled_camera"), "data_type": "distance_to_camera"} + ) + + # observation groups + policy: PolicyCfg = PolicyCfg() + sensor: SensorCfg = SensorCfg() + + +@configclass +class DepthObservationsObjLatentCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + obs_latents = ObsTerm(func=mdp.obs_latents) class SensorCfg(ObsGroup): """Observations for sensor group.""" @@ -149,6 +168,17 @@ def __post_init__(self): # remove ground as it obstructs the camera # self.scene.ground = None +class PackDepthCameraObjLatentEnvCfg(PackEnvCfg): + """Configuration for the packing environment with depth camera and object latents.""" + + scene: PackDepthCameraSceneCfg = PackDepthCameraSceneCfg(num_envs=512, env_spacing=2.5, replicate_physics=False) + observations: DepthObservationsObjLatentCfg = DepthObservationsObjLatentCfg() + + def __post_init__(self): + super().__post_init__() + # remove ground as it obstructs the camera + self.scene.ground = None + @configclass class PackResNet18DepthCameraEnvCfg(PackDepthCameraEnvCfg): diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py index 11eabfa..0bca656 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py @@ -56,24 +56,24 @@ # Define which object IDs to include ycb_include_ids = [ "003", # cracker_box - # "004", # sugar_box - # "006", # mustard_bottle - # "007", # tuna_fish_can - # # "008", # pudding_box - # # "009", # gelatin_box - # # "010", # potted_meat_can - # "011", # banana - # # "024", # bowl - # # "025", # mug - # "036", # wood_block - # # "051", # large_clamp - # # "052", # extra_large_clamp - # # "061", # foam_brick + "004", # sugar_box + "006", # mustard_bottle + "007", # tuna_fish_can + # "008", # pudding_box + # "009", # gelatin_box + # "010", # potted_meat_can + "011", # banana + # "024", # bowl + # "025", # mug + "036", # wood_block + # "051", # large_clamp + # "052", # extra_large_clamp + # "061", # foam_brick ] lw_include_names = [ # "cracker_box", - # "bowl", + "bowl", ] # Filter USD files based on ID prefixes @@ -90,7 +90,7 @@ if base_name in lw_include_names: usd_paths.append(usd_file) -num_object_per_env = 20 +num_object_per_env = 70 # Spacing between totes tote_spacing = 0.43 # width of tote + gap between totes @@ -152,7 +152,7 @@ def __post_init__(self): kinematic_enabled=False, disable_gravity=False, # enable_gyroscopic_forces=True, - solver_position_iteration_count=4, + solver_position_iteration_count=10, solver_velocity_iteration_count=0, sleep_threshold=0.005, stabilization_threshold=0.0025, @@ -195,11 +195,10 @@ class PolicyCfg(ObsGroup): # observation terms (order preserved) # actions = ObsTerm(func=mdp.last_action) - obs_dims = ObsTerm(func=mdp.obs_dims) + obs_lookahead = ObsTerm(func=mdp.obs_lookahead, max_objects=1) def __post_init__(self): self.enable_corruption = True - # self.concatenate_terms = True class SensorCfg(ObsGroup): @@ -239,15 +238,13 @@ class EventCfg: @configclass class RewardsCfg: """Reward terms for the MDP.""" + gcu_reward = RewardTerm( + func=mdp.gcu_reward, weight=2700.0 + ) - # gcu_reward = RewardTerm( - # func=mdp.gcu_reward_step, weight=1000.0 - # ) - - object_shift = RewardTerm(func=mdp.object_shift, weight=10.0) - - wasted_volume = RewardTerm(func=mdp.inverse_wasted_volume, weight=40.0) + # object_shift = RewardTerm(func=mdp.object_shift, weight=10.0) + # wasted_volume = RewardTerm(func=mdp.inverse_wasted_volume, weight=40.0) @configclass class TerminationsCfg: @@ -286,7 +283,7 @@ class PackEnvCfg(ManagerBasedRLEnvCfg): """Configuration for the reach end-effector pose tracking environment.""" # Scene settings - scene: PackSceneCfg = PackSceneCfg(num_envs=512, env_spacing=2.5, replicate_physics=False) + scene: PackSceneCfg = PackSceneCfg(num_envs=512, env_spacing=2.5, replicate_physics=False, clone_in_fabric=True) # Basic settings observations: ObservationsCfg = ObservationsCfg() actions: ActionsCfg = ActionsCfg() @@ -310,4 +307,4 @@ def __post_init__(self): self.sim.physx.gpu_max_rigid_patch_count = 4096 * 4096 self.sim.physx.gpu_collision_stack_size = 4096 * 4096 * 20 self.sim.physx.gpu_found_lost_pairs_capacity = 4096 * 4096 * 20 - self.sim.physx.gpu_max_rigid_contact_count = 2**26 + self.sim.physx.gpu_max_rigid_contact_count = 2**26 \ No newline at end of file diff --git a/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py b/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py index 979a7f1..af396d4 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py +++ b/source/geodude/geodude/tasks/manager_based/pack/utils/tote_manager.py @@ -233,7 +233,18 @@ def get_object_bboxes_batch(self, env_ids, obj_indices): bboxes[i, j] = self.get_object_bbox(env_idx, obj_idx) return bboxes - def get_object_latents_batch(self, env_ids, obj_indices): + def set_object_latents(self, obj_latents, env_ids): + """ + Set object latents for specified environments. + + Args: + obj_latents: Tensor of object latents + env_ids: Environment IDs to update + """ + self.obj_latents[env_ids] = obj_latents + + + def set_object_voxels(self, obj_voxels): """ Get object latents for a batch of environments and objects. @@ -494,23 +505,23 @@ def eject_totes(self, tote_ids, env_ids, is_dest=True, overfill_check=True, heig self.tote_keys, ) - # # Log destination tote ejections - overfilled_totes = torch.zeros((self.num_envs, self.num_totes), dtype=torch.bool, device=self.env.device) - overfilled_totes[env_ids[overfilled_envs], tote_ids[overfilled_envs]] = True - overfilled_totes = overfilled_totes[env_ids] - outbound_gcus = self.get_gcu(env_ids) - if self.log_stats: - if is_dest: - # for env_idx, problem in zip(env_ids[overfilled_envs].tolist(), [self.env.unwrapped.bpp.problems[i.item()] for i in env_ids[overfilled_envs]]): - # print("logging dest tote for env_idx:", env_idx) - # self.env.unwrapped.bpp.update_container_heightmap( - # self.env, torch.tensor([env_idx], device=self.env.device), torch.zeros((self.num_envs), device=self.env.device).int() - # ) - # self.stats.log_container(env_idx, problem.container) - self.stats.log_dest_tote_ejection(tote_ids[overfilled_envs], env_ids[overfilled_envs]) - self.stats.log_tote_eject_gcus( - torch.zeros_like(outbound_gcus), outbound_gcus, totes_ejected=overfilled_totes - ) + # Log destination tote ejections + # overfilled_totes = torch.zeros((self.num_envs, self.num_totes), dtype=torch.bool, device=self.env.device) + # overfilled_totes[env_ids[overfilled_envs], tote_ids[overfilled_envs]] = True + # overfilled_totes = overfilled_totes[env_ids] + # outbound_gcus = self.get_gcu(env_ids) + # if self.log_stats: + # if is_dest: + # # for env_idx, problem in zip(env_ids[overfilled_envs].tolist(), [self.env.unwrapped.bpp.problems[i.item()] for i in env_ids[overfilled_envs]]): + # # print("logging dest tote for env_idx:", env_idx) + # # self.env.unwrapped.bpp.update_container_heightmap( + # # self.env, torch.tensor([env_idx], device=self.env.device), torch.zeros((self.num_envs), device=self.env.device).int() + # # ) + # # self.stats.log_container(env_idx, problem.container) + # self.stats.log_dest_tote_ejection(tote_ids[overfilled_envs], env_ids[overfilled_envs]) + # self.stats.log_tote_eject_gcus( + # torch.zeros_like(outbound_gcus), outbound_gcus, totes_ejected=overfilled_totes + # ) assets_to_eject = [] for env_id, tote_id in zip(env_ids[overfilled_envs], tote_ids[overfilled_envs]): From a94247c68dd5823ffb8fe94480708cc22366ffcf Mon Sep 17 00:00:00 2001 From: Henri Fung Date: Thu, 20 Nov 2025 13:44:33 -0800 Subject: [PATCH 2/5] Test Settings --- .../no_arm/agents/rsl_rl_ppo_camera_cfg.py | 20 +++--- .../rsl_rl_ppo_camera_obj_latent_cfg.py | 1 - .../tasks/manager_based/pack/mdp/events.py | 21 +++--- .../manager_based/pack/pack_camera_env_cfg.py | 2 - .../tasks/manager_based/pack/pack_env_cfg.py | 69 ++++++++++++------- 5 files changed, 64 insertions(+), 49 deletions(-) diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py index 01efa4e..5de41a8 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py @@ -5,33 +5,35 @@ from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dCfg from isaaclab.utils import configclass -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg +from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg @configclass class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg): - num_steps_per_env = 24 - max_iterations = 1500 + num_steps_per_env = 4 + max_iterations = 3000 save_interval = 10 experiment_name = "no_arm_pack" - empirical_normalization = True + empirical_normalization = False policy = RslRlPpoActorCriticConv2dCfg( - init_noise_std=40.0, + init_noise_std=1.5, actor_hidden_dims=[128, 128], critic_hidden_dims=[128, 128], activation="elu", conv_layers_params=[ {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1}, - {"out_channels": 8, "kernel_size": 3, "stride": 2}, + # {"out_channels": 8, "kernel_size": 3, "stride": 2}, {"out_channels": 16, "kernel_size": 3, "stride": 2}, ], conv_linear_output_size=128, # Project 128×13×10 into 128-dim ) - algorithm = RslRlPpoAlgorithmCfg( + algorithm = RslRlGCUPpoAlgorithmCfg( value_loss_coef=0.5, use_clipped_value_loss=True, clip_param=0.2, - entropy_coef=0.0025, + placement_entropy_coef=0.0005, + orientation_entropy_coef=0.01, num_learning_epochs=5, num_mini_batches=4, learning_rate=1.0e-3, @@ -40,4 +42,4 @@ class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg): lam=0.95, desired_kl=0.01, max_grad_norm=1.0, - ) + ) \ No newline at end of file diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py index 2decedd..7450d1a 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py @@ -22,7 +22,6 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg): activation="elu", conv_layers_params=[ {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1}, - # {"out_channels": 8, "kernel_size": 3, "stride": 2}, {"out_channels": 16, "kernel_size": 3, "stride": 2}, ], conv_linear_output_size=128, # Project 128×13×10 into 256-dim diff --git a/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py b/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py index 11a2dab..020f380 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py +++ b/source/geodude/geodude/tasks/manager_based/pack/mdp/events.py @@ -244,9 +244,7 @@ def load_latents(asset_path): # Compute properties only once per unique asset path if asset_path not in mesh_properties_cache: - print("asset_path", asset_path) bbox = compute_mesh_bbox(mesh) * scale - print("bbox: ", bbox) vox = compute_voxelized_geometry_usd(mesh, bbox, scale=scale) volume = mesh_volume(mesh) * (scale**3) latents = load_latents(asset_path) @@ -731,7 +729,7 @@ def inverse_wasted_volume(env: ManagerBasedRLGCUEnv, gamma=0.99): return inverse_wasted_volume -def wasted_volume_pbrs(env: ManagerBasedRLGCUEnv, gamma=0.99): +def wasted_volume_pbrs(env: ManagerBasedRLGCUEnv, gamma = 0.99): """ Computes the wasted volume in the tote, defined as 1 - (% top down volume - GCU of objects). 1 - (% top down volume - GCU of objects). @@ -750,18 +748,19 @@ def wasted_volume_pbrs(env: ManagerBasedRLGCUEnv, gamma=0.99): top_down_volumes = torch.sum(top_down_volumes_, dim=(1, 2)) # Sum over heightmap dimensions top_down_volumes = (top_down_volumes / total_volume).squeeze(1) - objects_volume = env.tote_manager.stats.recent_gcu_values[ - torch.arange(env.num_envs, device=env.device), env.tote_manager.dest_totes - ] - inverse_wasted_volume = objects_volume / (top_down_volumes + 1e-6) - pbrs = gamma * inverse_wasted_volume - last_pbrs - env.tote_manager.last_pbrs = inverse_wasted_volume + objects_volume = ( + env.tote_manager.stats.recent_gcu_values[ + torch.arange(env.num_envs, device=env.device), env.tote_manager.dest_totes + ] + ) + inverse_wasted_volume = objects_volume / (top_down_volumes + 1e-9) if env.tote_manager.reset_pbrs.any(): - env.tote_manager.last_pbrs[env.tote_manager.reset_pbrs] = 0 + inverse_wasted_volume[env.tote_manager.reset_pbrs] = 0 env.tote_manager.reset_pbrs[env.tote_manager.reset_pbrs] = False + pbrs = gamma * inverse_wasted_volume - last_pbrs + env.tote_manager.last_pbrs = inverse_wasted_volume return pbrs - def object_overfilled_tote(env: ManagerBasedRLGCUEnv): """Checks if any object is overfilled the tote. Args: diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py index 160f166..05c238e 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/pack_camera_env_cfg.py @@ -74,9 +74,7 @@ class DepthObservationsCfg: @configclass class PolicyCfg(ObsGroup): """Observations for policy group.""" - # observation terms (order preserved) - # last_action = ObsTerm(func=mdp.last_action) obs_lookahead = ObsTerm(func=mdp.obs_lookahead, params={"max_objects": 1}) class SensorCfg(ObsGroup): diff --git a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py index 0bca656..37a4963 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/pack_env_cfg.py @@ -20,6 +20,7 @@ from isaaclab.managers import ObservationGroupCfg as ObsGroup from isaaclab.managers import ObservationTermCfg as ObsTerm from isaaclab.managers import RewardTermCfg as RewardTerm +from isaaclab.managers import CurriculumTermCfg as CurriculumTerm from isaaclab.managers import SceneEntityCfg from isaaclab.managers import TerminationTermCfg as DoneTerm from isaaclab.scene import InteractiveSceneCfg @@ -55,25 +56,41 @@ # Define which object IDs to include ycb_include_ids = [ - "003", # cracker_box - "004", # sugar_box - "006", # mustard_bottle - "007", # tuna_fish_can - # "008", # pudding_box - # "009", # gelatin_box - # "010", # potted_meat_can - "011", # banana - # "024", # bowl - # "025", # mug - "036", # wood_block - # "051", # large_clamp - # "052", # extra_large_clamp - # "061", # foam_brick + # "003", # cracker_box + # "004", # sugar_box + # "006", # mustard_bottle + # "007", # tuna_fish_can + # # "008", # pudding_box + # # "009", # gelatin_box + # # "010", # potted_meat_can + # "011", # banana + # # "024", # bowl + # # "025", # mug + # "036", # wood_block + # # "051", # large_clamp + # # "052", # extra_large_clamp + # # "061", # foam_brick ] lw_include_names = [ - # "cracker_box", + "cracker_box", + "banana", "bowl", + "sugar_box", + "mustard_bottle", + "tuna_fish_can", + "wood_block", + "tomato_soup_can", + "tennis_ball", + "rubiks_cube", + # "Rope" + "pudding_box", + "potted_meat_can", + # "plate", + "mug", + # "mini_soccer_ball", + "master_chef_can", + "chips_can_berkeley_meshes", ] # Filter USD files based on ID prefixes @@ -90,7 +107,7 @@ if base_name in lw_include_names: usd_paths.append(usd_file) -num_object_per_env = 70 +num_object_per_env = 80 # Spacing between totes tote_spacing = 0.43 # width of tote + gap between totes @@ -152,8 +169,8 @@ def __post_init__(self): kinematic_enabled=False, disable_gravity=False, # enable_gyroscopic_forces=True, - solver_position_iteration_count=10, - solver_velocity_iteration_count=0, + # solver_position_iteration_count=10, + # solver_velocity_iteration_count=0, sleep_threshold=0.005, stabilization_threshold=0.0025, # max_depenetration_velocity=1000.0, @@ -195,10 +212,12 @@ class PolicyCfg(ObsGroup): # observation terms (order preserved) # actions = ObsTerm(func=mdp.last_action) - obs_lookahead = ObsTerm(func=mdp.obs_lookahead, max_objects=1) + obs_dims = ObsTerm(func=mdp.obs_dims) + # obs_lookahead = ObsTerm(func=mdp.obs_lookahead) def __post_init__(self): self.enable_corruption = True + # self.concatenate_terms = True class SensorCfg(ObsGroup): @@ -239,12 +258,14 @@ class EventCfg: class RewardsCfg: """Reward terms for the MDP.""" gcu_reward = RewardTerm( - func=mdp.gcu_reward, weight=2700.0 + func=mdp.gcu_reward, weight=500.0 ) # object_shift = RewardTerm(func=mdp.object_shift, weight=10.0) - # wasted_volume = RewardTerm(func=mdp.inverse_wasted_volume, weight=40.0) + episode_bonus = RewardTerm(func=mdp.episode_bonus, weight=10.0) + unused_phi_s = RewardTerm(func=mdp.inverse_wasted_volume, weight=0.0) # For logging only + potential_function_F = RewardTerm(func=mdp.wasted_volume_pbrs, weight=10.0) @configclass class TerminationsCfg: @@ -258,11 +279,7 @@ class TerminationsCfg: @configclass class CurriculumCfg: """Curriculum terms for the MDP.""" - pass - # object_shift = CurriculumTerm( - # func=mdp.modify_reward_weight, params={"term_name": "object_shift", "weight": 50.0, "num_steps": 10000} - # ) @configclass @@ -303,7 +320,7 @@ def __post_init__(self): self.episode_length_s = 10.0 self.viewer.eye = (0, 0.1, 5.5) # simulation settings - self.sim.dt = 1.0 / 90.0 + self.sim.dt = 1.0 / 60.0 self.sim.physx.gpu_max_rigid_patch_count = 4096 * 4096 self.sim.physx.gpu_collision_stack_size = 4096 * 4096 * 20 self.sim.physx.gpu_found_lost_pairs_capacity = 4096 * 4096 * 20 From 90d9fec986f51807cbd0b8cd1a8483a4bbfb0e91 Mon Sep 17 00:00:00 2001 From: Henri Fung Date: Sun, 23 Nov 2025 12:27:49 -0800 Subject: [PATCH 3/5] Fixed gcu to use fifo --- rsl_rl | 2 +- .../gculab_rl/rsl_rl/gcu_vecenv_wrapper.py | 23 ++++++++++++++----- .../no_arm/agents/rsl_rl_ppo_camera_cfg.py | 2 +- .../rsl_rl_ppo_camera_obj_latent_cfg.py | 9 ++++---- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/rsl_rl b/rsl_rl index 52c5bd0..9ca962b 160000 --- a/rsl_rl +++ b/rsl_rl @@ -1 +1 @@ -Subproject commit 52c5bd047aebf194b8a7784669678edc0e53457c +Subproject commit 9ca962baf942152a7bf6de285c833df654e17dda diff --git a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py index bfd4bef..08cc755 100644 --- a/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py +++ b/source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py @@ -129,11 +129,14 @@ def _convert_to_pos_quat(self, actions: torch.Tensor, object_to_pack: list) -> t self.env.unwrapped.tote_manager.get_object_bbox(env_idx, obj_idx) for env_idx, obj_idx in zip( torch.arange(actions.shape[0], device=self.env.unwrapped.device), - object_to_pack, + object_to_pack ) ]) - - rotated_dim = calculate_rotated_bounding_box(bbox_offset, quats, device=self.env.unwrapped.device) + rotated_dim = ( + calculate_rotated_bounding_box( + bbox_offset, quats, device=self.env.unwrapped.device + ) + ) x_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0] y_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1] x = torch.sigmoid(5 * x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0]) @@ -206,9 +209,17 @@ def step( torch.arange(self.env.unwrapped.num_envs, device=self.env.unwrapped.device), tote_ids, )[0] - object_to_pack = [row[0] for row in packable_objects] + + # Update FIFO queues with new packable objects + self.env.unwrapped.bpp.update_fifo_queues(packable_objects) + + # Select objects using FIFO (First In, First Out) ordering + object_to_pack = self.env.unwrapped.bpp.select_fifo_packable_objects(packable_objects, self.env.unwrapped.device) + # Remove the selected object from the front of the queue + self.env.unwrapped.bpp.remove_selected_from_fifo(object_to_pack) + for i in range(self.env.unwrapped.num_envs): - self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()])) + self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()], device=self.env.unwrapped.device)) actions, xy_pos_range, rotated_dim = self._convert_to_pos_quat(actions, object_to_pack) @@ -217,7 +228,7 @@ def step( actions = torch.cat( [ tote_ids.unsqueeze(1).to(self.env.unwrapped.device), # Destination tote IDs - torch.tensor(object_to_pack, device=self.env.unwrapped.device).unsqueeze(1), # Object indices + object_to_pack.unsqueeze(1), # Object indices actions, ], dim=1, diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py index 5de41a8..e90a7c4 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_cfg.py @@ -15,7 +15,7 @@ class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 3000 save_interval = 10 experiment_name = "no_arm_pack" - empirical_normalization = False + empirical_normalization = True policy = RslRlPpoActorCriticConv2dCfg( init_noise_std=1.5, actor_hidden_dims=[128, 128], diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py index 7450d1a..b4683e1 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py @@ -22,9 +22,10 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg): activation="elu", conv_layers_params=[ {"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1}, + # {"out_channels": 8, "kernel_size": 3, "stride": 2}, {"out_channels": 16, "kernel_size": 3, "stride": 2}, ], - conv_linear_output_size=128, # Project 128×13×10 into 256-dim + conv_linear_output_size=128, # Project 128×13×10 into 128-dim pointnet_layers_params=[ {"out_channels": 64}, {"out_channels": 256}, @@ -36,14 +37,14 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg): value_loss_coef=0.5, use_clipped_value_loss=True, clip_param=0.2, - placement_entropy_coef=0.0005, - orientation_entropy_coef=0.01, + placement_entropy_coef=0.0, + orientation_entropy_coef=0.005, num_learning_epochs=5, num_mini_batches=4, learning_rate=1.0e-3, schedule="adaptive", gamma=0.99, lam=0.95, - desired_kl=0.01, + desired_kl=0.03, max_grad_norm=1.0, ) From 72f5932a4127bc65a5b63c24a4c796806dc52457 Mon Sep 17 00:00:00 2001 From: Henri Fung Date: Sun, 23 Nov 2025 13:37:20 -0800 Subject: [PATCH 4/5] Fix exporter to use gculab's --- scripts/rsl_rl/play.py | 4 +++- source/gculab_rl/gculab_rl/rsl_rl/__init__.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/rsl_rl/play.py b/scripts/rsl_rl/play.py index 1ea7d0b..eab160d 100644 --- a/scripts/rsl_rl/play.py +++ b/scripts/rsl_rl/play.py @@ -61,12 +61,14 @@ import geodude.tasks # noqa: F401 from gculab_rl.rsl_rl import ( RslRlGCUVecEnvWrapper, + export_policy_as_jit, + export_policy_as_onnx, ) from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent from isaaclab.utils.assets import retrieve_file_path from isaaclab.utils.dict import print_dict from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner diff --git a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py index 4b09a40..be0af8a 100644 --- a/source/gculab_rl/gculab_rl/rsl_rl/__init__.py +++ b/source/gculab_rl/gculab_rl/rsl_rl/__init__.py @@ -17,3 +17,4 @@ from .gcu_vecenv_wrapper import RslRlGCUVecEnvWrapper from .rl_cfg import RslRlPpoActorCriticConv2dCfg, RslRlGCUPpoAlgorithmCfg, RslRlPpoActorCriticConv2dPointNetCfg +from .exporter import export_policy_as_jit, export_policy_as_onnx \ No newline at end of file From 8f1f25fd52403bd8bb086aded778ac734ff751ba Mon Sep 17 00:00:00 2001 From: Henri Fung Date: Tue, 25 Nov 2025 20:57:51 -0800 Subject: [PATCH 5/5] Adjust settings --- .../config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py index b4683e1..9faecfd 100644 --- a/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py +++ b/source/geodude/geodude/tasks/manager_based/pack/config/no_arm/agents/rsl_rl_ppo_camera_obj_latent_cfg.py @@ -38,13 +38,13 @@ class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg): use_clipped_value_loss=True, clip_param=0.2, placement_entropy_coef=0.0, - orientation_entropy_coef=0.005, + orientation_entropy_coef=0.0, num_learning_epochs=5, num_mini_batches=4, learning_rate=1.0e-3, schedule="adaptive", gamma=0.99, lam=0.95, - desired_kl=0.03, + desired_kl=0.005, max_grad_norm=1.0, )