Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gcu_objects
20 changes: 11 additions & 9 deletions scripts/rsl_rl/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper
from isaaclab_tasks.utils import get_checkpoint_path, parse_env_cfg

from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner
from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner
from rsl_rl.utils import normalize_and_flatten_image_obs


Expand Down Expand Up @@ -122,7 +122,7 @@ def main():
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# wrap around environment for rsl-rl
if agent_cfg.policy.class_name == "ActorCriticConv2d":
if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
else:
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
Expand All @@ -131,6 +131,8 @@ def main():
# load previously trained model
if agent_cfg.policy.class_name == "ActorCriticConv2d":
ppo_runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
ppo_runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
else:
ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
ppo_runner.load(resume_path)
Expand Down Expand Up @@ -191,13 +193,13 @@ def main():
print(
"GCU ", env.unwrapped.tote_manager.get_gcu(torch.arange(args_cli.num_envs, device=env.unwrapped.device))
)
# print("\n===== Ejection Summary =====")
# print(f"Total steps: {stats['total_steps']}")
# if ejection_summary != {}:
# for i in range(len(ejection_summary.keys())):
# env_id = list(ejection_summary.keys())[i]
# print(ejection_summary[env_id])
# print("==========================\n")
print("\n===== Ejection Summary =====")
print(f"Total steps: {stats['total_steps']}")
if ejection_summary != {}:
for i in range(len(ejection_summary.keys())):
env_id = list(ejection_summary.keys())[i]
print(ejection_summary[env_id])
print("==========================\n")
# env.unwrapped.bpp.update_container_heightmap(env, torch.arange(args_cli.num_envs).to(env.unwrapped.device), torch.zeros(args_cli.num_envs, device=env.unwrapped.device).int())
# env stepping
obs, _, _, infos = env.step(actions, image_obs=image_obs)
Expand Down
6 changes: 4 additions & 2 deletions scripts/rsl_rl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
from isaaclab_tasks.utils import get_checkpoint_path
from isaaclab_tasks.utils.hydra import hydra_task_config

from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner
from rsl_rl.runners import GCUOnPolicyRunner, OnPolicyRunner, GCUOnPolicyConv2dPointNetRunner

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
Expand Down Expand Up @@ -164,14 +164,16 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# wrap around environment for rsl-rl
if agent_cfg.policy.class_name == "ActorCriticConv2d":
if agent_cfg.policy.class_name == "ActorCriticConv2d" or agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
env = RslRlGCUVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
else:
env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)

# create runner from rsl-rl
if agent_cfg.policy.class_name == "ActorCriticConv2d":
runner = GCUOnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
elif agent_cfg.policy.class_name == "ActorCriticConv2dPointNet":
runner = GCUOnPolicyConv2dPointNetRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
else:
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
# write git state to logs
Expand Down
12 changes: 6 additions & 6 deletions source/gculab/gculab/envs/manager_based_rl_gcu_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,12 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
for i in range(wait_time):
self.scene.write_data_to_sim()
self.sim.step(render=False)
if (
self._sim_step_counter % self.cfg.sim.render_interval == 0
and is_rendering
and self.tote_manager.animate
):
self.sim.render()
# if (
# self._sim_step_counter % self.cfg.sim.render_interval == 0
# and is_rendering
# and self.tote_manager.animate
# ):
# self.sim.render()
# update buffers at sim dt - only on last iteration to reduce GPU interface calls
if i == wait_time - 1:
self.scene.update(dt=self.physics_dt)
Expand Down
3 changes: 2 additions & 1 deletion source/gculab_rl/gculab_rl/rsl_rl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@
"""

from .gcu_vecenv_wrapper import RslRlGCUVecEnvWrapper
from .rl_cfg import RslRlPpoActorCriticConv2dCfg
from .rl_cfg import RslRlPpoActorCriticConv2dCfg, RslRlGCUPpoAlgorithmCfg, RslRlPpoActorCriticConv2dPointNetCfg
from .exporter import export_policy_as_jit, export_policy_as_onnx
27 changes: 19 additions & 8 deletions source/gculab_rl/gculab_rl/rsl_rl/gcu_vecenv_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,18 @@ def _convert_to_pos_quat(self, actions: torch.Tensor, object_to_pack: list) -> t
self.env.unwrapped.tote_manager.get_object_bbox(env_idx, obj_idx)
for env_idx, obj_idx in zip(
torch.arange(actions.shape[0], device=self.env.unwrapped.device),
object_to_pack,
object_to_pack
)
])

rotated_dim = calculate_rotated_bounding_box(bbox_offset, quats, device=self.env.unwrapped.device)
rotated_dim = (
calculate_rotated_bounding_box(
bbox_offset, quats, device=self.env.unwrapped.device
)
)
x_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0]
y_pos_range = self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1]
x = torch.sigmoid(x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0])
y = torch.sigmoid(y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1])
x = torch.sigmoid(5 * x) * (self.env.unwrapped.tote_manager.true_tote_dim[0] / 100 - rotated_dim[:, 0])
y = torch.sigmoid(5 * y) * (self.env.unwrapped.tote_manager.true_tote_dim[1] / 100 - rotated_dim[:, 1])

# Compute z analytically for each sample in the batch using multiprocessing
z = torch.zeros_like(x)
Expand Down Expand Up @@ -206,9 +209,17 @@ def step(
torch.arange(self.env.unwrapped.num_envs, device=self.env.unwrapped.device),
tote_ids,
)[0]
object_to_pack = [row[0] for row in packable_objects]

# Update FIFO queues with new packable objects
self.env.unwrapped.bpp.update_fifo_queues(packable_objects)

# Select objects using FIFO (First In, First Out) ordering
object_to_pack = self.env.unwrapped.bpp.select_fifo_packable_objects(packable_objects, self.env.unwrapped.device)
# Remove the selected object from the front of the queue
self.env.unwrapped.bpp.remove_selected_from_fifo(object_to_pack)

for i in range(self.env.unwrapped.num_envs):
self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()]))
self.unwrapped.bpp.packed_obj_idx[i].append(torch.tensor([object_to_pack[i].item()], device=self.env.unwrapped.device))

actions, xy_pos_range, rotated_dim = self._convert_to_pos_quat(actions, object_to_pack)

Expand All @@ -217,7 +228,7 @@ def step(
actions = torch.cat(
[
tote_ids.unsqueeze(1).to(self.env.unwrapped.device), # Destination tote IDs
torch.tensor(object_to_pack, device=self.env.unwrapped.device).unsqueeze(1), # Object indices
object_to_pack.unsqueeze(1), # Object indices
actions,
],
dim=1,
Expand Down
87 changes: 87 additions & 0 deletions source/gculab_rl/gculab_rl/rsl_rl/rl_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlPpoActorCriticCfg
from isaaclab.utils import configclass

from isaaclab_rl.rsl_rl.rnd_cfg import RslRlRndCfg
from isaaclab_rl.rsl_rl.symmetry_cfg import RslRlSymmetryCfg

@configclass
class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):
Expand All @@ -26,3 +29,87 @@ class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):

conv_linear_output_size: int = 16
"""Output size of the linear layer after the convolutional features are flattened."""

@configclass
class RslRlPpoActorCriticConv2dPointNetCfg(RslRlPpoActorCriticConv2dCfg):
"""Configuration for the PPO actor-critic networks with convolutional layers and PointNet."""

class_name: str = "ActorCriticConv2dPointNet"
"""The policy class name. Default is ActorCriticConv2dPointNet."""

pointnet_layers_params: list[dict] = [
{"out_channels": 64},
{"out_channels": 256},
]
"""List of PointNet layer parameters."""
pointnet_in_dim: int = 8
"""Input dimension for the PointNet."""
pointnet_num_points: int = 512
"""Number of points for the PointNet."""

############################
# Algorithm configurations #
############################


@configclass
class RslRlGCUPpoAlgorithmCfg:
"""Configuration for the PPO algorithm."""

class_name: str = "PPO"
"""The algorithm class name. Default is PPO."""

num_learning_epochs: int = MISSING
"""The number of learning epochs per update."""

num_mini_batches: int = MISSING
"""The number of mini-batches per update."""

learning_rate: float = MISSING
"""The learning rate for the policy."""

schedule: str = MISSING
"""The learning rate schedule."""

gamma: float = MISSING
"""The discount factor."""

lam: float = MISSING
"""The lambda parameter for Generalized Advantage Estimation (GAE)."""

placement_entropy_coef: float = MISSING
"""The coefficient for the placement_entropy_coef loss."""

orientation_entropy_coef: float = MISSING
"""The coefficient for the orientation_entropy_coef loss."""

desired_kl: float = MISSING
"""The desired KL divergence."""

max_grad_norm: float = MISSING
"""The maximum gradient norm."""

value_loss_coef: float = MISSING
"""The coefficient for the value loss."""

use_clipped_value_loss: bool = MISSING
"""Whether to use clipped value loss."""

clip_param: float = MISSING
"""The clipping parameter for the policy."""

normalize_advantage_per_mini_batch: bool = False
"""Whether to normalize the advantage per mini-batch. Default is False.

If True, the advantage is normalized over the mini-batches only.
Otherwise, the advantage is normalized over the entire collected trajectories.
"""

symmetry_cfg: RslRlSymmetryCfg | None = None
"""The symmetry configuration. Default is None, in which case symmetry is not used."""

rnd_cfg: RslRlRndCfg | None = None
"""The configuration for the Random Network Distillation (RND) module. Default is None,
in which case RND is not used.
"""

Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,13 @@
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_cfg:NoArmPackPPOCameraRunnerCfg",
},
)

gym.register(
id="Isaac-Pack-NoArm-Camera-Obj-Latent-v0",
entry_point="gculab.envs:ManagerBasedRLGCUEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.joint_pos_camera_env_cfg:NoArmPackCameraObjLatentEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_camera_obj_latent_cfg:NoArmPackPPOCameraObjLatentRunnerCfg",
},
)
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,35 @@

from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dCfg
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg
from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg


@configclass
class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 24
max_iterations = 1500
num_steps_per_env = 4
max_iterations = 3000
save_interval = 10
experiment_name = "no_arm_pack"
empirical_normalization = True
policy = RslRlPpoActorCriticConv2dCfg(
init_noise_std=40.0,
init_noise_std=1.5,
actor_hidden_dims=[128, 128],
critic_hidden_dims=[128, 128],
activation="elu",
conv_layers_params=[
{"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
{"out_channels": 8, "kernel_size": 3, "stride": 2},
# {"out_channels": 8, "kernel_size": 3, "stride": 2},
{"out_channels": 16, "kernel_size": 3, "stride": 2},
],
conv_linear_output_size=128, # Project 128×13×10 into 128-dim
)
algorithm = RslRlPpoAlgorithmCfg(
algorithm = RslRlGCUPpoAlgorithmCfg(
value_loss_coef=0.5,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.0025,
placement_entropy_coef=0.0005,
orientation_entropy_coef=0.01,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
Expand All @@ -40,4 +42,4 @@ class NoArmPackPPOCameraRunnerCfg(RslRlOnPolicyRunnerCfg):
lam=0.95,
desired_kl=0.01,
max_grad_norm=1.0,
)
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause

from gculab_rl.rsl_rl import RslRlPpoActorCriticConv2dPointNetCfg
from isaaclab.utils import configclass
from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoAlgorithmCfg
from gculab_rl.rsl_rl import RslRlGCUPpoAlgorithmCfg

@configclass
class NoArmPackPPOCameraObjLatentRunnerCfg(RslRlOnPolicyRunnerCfg):
num_steps_per_env = 4
max_iterations = 3000
save_interval = 10
experiment_name = "no_arm_pack"
empirical_normalization = True
policy = RslRlPpoActorCriticConv2dPointNetCfg(
init_noise_std=1.5,
actor_hidden_dims=[128, 128],
critic_hidden_dims=[128, 128],
activation="elu",
conv_layers_params=[
{"out_channels": 4, "kernel_size": 3, "stride": 2, "padding": 1},
# {"out_channels": 8, "kernel_size": 3, "stride": 2},
{"out_channels": 16, "kernel_size": 3, "stride": 2},
],
conv_linear_output_size=128, # Project 128×13×10 into 128-dim
pointnet_layers_params=[
{"out_channels": 64},
{"out_channels": 256},
],
pointnet_in_dim=8,
pointnet_num_points=512,
)
algorithm = RslRlGCUPpoAlgorithmCfg(
value_loss_coef=0.5,
use_clipped_value_loss=True,
clip_param=0.2,
placement_entropy_coef=0.0,
orientation_entropy_coef=0.0,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
schedule="adaptive",
gamma=0.99,
lam=0.95,
desired_kl=0.005,
max_grad_norm=1.0,
)
Loading