diff --git a/hci/user-in-the-box/uitb/__init__.py b/hci/user-in-the-box/uitb/__init__.py new file mode 100644 index 00000000..fdb691f0 --- /dev/null +++ b/hci/user-in-the-box/uitb/__init__.py @@ -0,0 +1 @@ +from uitb.simulator import Simulator \ No newline at end of file diff --git a/hci/user-in-the-box/uitb/bm_models/__init__.py b/hci/user-in-the-box/uitb/bm_models/__init__.py new file mode 100644 index 00000000..8e956c03 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/__init__.py @@ -0,0 +1,5 @@ +from .mobl_arms_index.MoblArmsIndex import MoblArmsIndex +from .mobl_arms.MoblArms import MoblArms +from .mobl_arms_wrist.MoblArmsWrist import MoblArmsWrist +from .mobl_arms_bimanual.MoblArmsBimanual import MoblArmsBimanual +from .mobl_arms_bimanual_motor.MoblArmsBimanualMotor import MoblArmsBimanualMotor diff --git a/hci/user-in-the-box/uitb/bm_models/base.py b/hci/user-in-the-box/uitb/bm_models/base.py new file mode 100644 index 00000000..a7dc9521 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/base.py @@ -0,0 +1,368 @@ +import numpy as np +import xml.etree.ElementTree as ET +import pathlib +import os +import shutil +import inspect +import mujoco +from abc import ABC, abstractmethod +import importlib +from typing import final + +from ..utils.functions import parent_path +from ..utils import element_tree as ETutils + + +class BaseBMModel(ABC): + + def __init__(self, model, data, **kwargs): + """Initializes a new `BaseBMModel`. + + Args: + model: Mujoco model instance of the simulator. + data: Mujoco data instance of the simulator. + **kwargs: Many keywords that should be documented somewhere + """ + + # Initialise mujoco model of the biomechanical model, easier to manipulate things + bm_model = mujoco.MjModel.from_xml_path(self.get_xml_file()) + + # Get an rng + self._rng = np.random.default_rng(kwargs.get("random_seed", None)) + + # Total number of actuators + self._nu = bm_model.nu + + # Number of muscle actuators + self._na = bm_model.na + + # Number of motor actuators + self._nm = self._nu - self._na + self._motor_act = np.zeros((self._nm,)) + self._motor_alpha = 0.9 + + # Get actuator names (muscle and motor) + self._actuator_names = [mujoco.mj_id2name(bm_model, mujoco.mjtObj.mjOBJ_ACTUATOR, i) for i in range(bm_model.nu)] + self._muscle_actuator_names = set(np.array(self._actuator_names)[bm_model.actuator_trntype==3]) + self._motor_actuator_names = set(self._actuator_names) - self._muscle_actuator_names + + # Sort the names to preserve original ordering (not really necessary but looks nicer) + self._muscle_actuator_names = sorted(self._muscle_actuator_names, key=self._actuator_names.index) + self._motor_actuator_names = sorted(self._motor_actuator_names, key=self._actuator_names.index) + + # Find actuator indices in the simulation + self._muscle_actuators = [mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_ACTUATOR, actuator_name) + for actuator_name in self._muscle_actuator_names] + self._motor_actuators = [mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_ACTUATOR, actuator_name) + for actuator_name in self._motor_actuator_names] + + # Get joint names (dependent and independent) + self._joint_names = [mujoco.mj_id2name(bm_model, mujoco.mjtObj.mjOBJ_JOINT, i) for i in range(bm_model.njnt)] + _eq_on = getattr(bm_model, "eq_active", None) + if _eq_on is None: + _eq_on = getattr(bm_model, "eq_active0", np.ones(bm_model.neq)) + _eq_on = np.asarray(_eq_on).astype(bool) + self._dependent_joint_names = {self._joint_names[idx] for idx in + np.unique(bm_model.eq_obj1id[_eq_on])} \ + if bm_model.eq_obj1id is not None else set() + self._independent_joint_names = set(self._joint_names) - self._dependent_joint_names + + # Sort the names to preserve original ordering (not really necessary but looks nicer) + self._dependent_joint_names = sorted(self._dependent_joint_names, key=self._joint_names.index) + self._independent_joint_names = sorted(self._independent_joint_names, key=self._joint_names.index) + + # Find dependent and independent joint indices in the simulation + self._dependent_joints = [mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, joint_name) + for joint_name in self._dependent_joint_names] + self._independent_joints = [mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, joint_name) + for joint_name in self._independent_joint_names] + + # If there are 'free' type of joints, we'll need to be more careful with which dof corresponds to + # which joint, for both qpos and qvel/qacc. There should be exactly one dof per independent/dependent joint. + def get_dofs(joint_indices): + qpos = [] + dofs = [] + for joint_idx in joint_indices: + if model.jnt_type[joint_idx] not in [mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE]: + raise NotImplementedError(f"Only 'hinge' and 'slide' joints are supported, joint " + f"{self._joint_names[joint_idx]} is of type {mujoco.mjtJoint(model.jnt_type[joint_idx]).name}") + qpos.append(model.jnt_qposadr[joint_idx]) + dofs.append(model.jnt_dofadr[joint_idx]) + return qpos, dofs + self._dependent_qpos, self._dependent_dofs = get_dofs(self._dependent_joints) + self._independent_qpos, self._independent_dofs = get_dofs(self._independent_joints) + + # Get the effort model; some models might need to know dt + self._effort_model = self.get_effort_model(kwargs.get("effort_model", {"cls": "Zero"}), dt=kwargs["dt"]) + + # Define signal-dependent noise + self._sigdepnoise_type = kwargs.get("sigdepnoise_type", None) #"white") + self._sigdepnoise_level = kwargs.get("sigdepnoise_level", 0.103) + self._sigdepnoise_rng = np.random.default_rng(kwargs.get("random_seed", None)) + self._sigdepnoise_acc = 0 #only used for red/Brownian noise + + # Define constant (i.e., signal-independent) noise + self._constantnoise_type = kwargs.get("constantnoise_type", None) #"white") + self._constantnoise_level = kwargs.get("constantnoise_level", 0.185) + self._constantnoise_rng = np.random.default_rng(kwargs.get("random_seed", None)) + self._constantnoise_acc = 0 #only used for red/Brownian noise + + ############ The methods below you should definitely overwrite as they are important ############ + + @classmethod + @abstractmethod + def _get_floor(cls): + """ If there's a floor in the bm_model.xml file it should be defined here. + + Returns: + * None if there is no floor in the file + * A dict like {"tag": "geom", "name": "name-of-the-geom"}, where "tag" indicates what kind of element the floor + is, and "name" is the name of the element. + """ + pass + + + ############ The methods below are overwritable but often don't need to be overwritten ############ + + def _reset(self, model, data): + """ Resets the biomechanical model. """ + + # Randomly sample qpos, qvel, act around zero values + nq = len(self._independent_qpos) + qpos = self._rng.uniform(low=np.ones((nq,))*-0.05, high=np.ones((nq,))*0.05) + qvel = self._rng.uniform(low=np.ones((nq,))*-0.05, high=np.ones((nq,))*0.05) + act = self._rng.uniform(low=np.zeros((self._na,)), high=np.ones((self._na,))) + + # Set qpos and qvel + data.qpos[self._dependent_qpos] = 0 + data.qpos[self._independent_qpos] = qpos + data.qvel[self._dependent_dofs] = 0 + data.qvel[self._independent_dofs] = qvel + data.act[self._muscle_actuators] = act + + # Sample random initial values for motor activation + self._motor_act = self._rng.uniform(low=np.zeros((self._nm,)), high=np.ones((self._nm,))) + # Reset smoothed average of motor actuator activation + self._motor_smooth_avg = np.zeros((self._nm,)) + + # Reset accumulative noise + self._sigdepnoise_acc = 0 + self._constantnoise_acc = 0 + + def _update(self, model, data): + """ Update the biomechanical model after a step has been taken in the simulator. """ + pass + + def _get_state(self, model, data): + """ Return the state of the biomechanical model. These states are used only for logging/evaluation, not for RL + training + + Args: + model: Mujoco model instance of the simulator. + data: Mujoco data instance of the simulator. + + Returns: + A dict where each key should have a float or a numpy vector as their value + """ + return dict() + + def set_ctrl(self, model, data, action): + """ Set control values for the biomechanical model. + + Args: + model: Mujoco model instance of the simulator. + data: Mujoco data instance of the simulator. + action: Action values between [-1, 1] + + """ + + _selected_motor_control = np.clip(self._motor_act + action[:self._nm], 0, 1) + _selected_muscle_control = np.clip(data.act[self._muscle_actuators] + action[self._nm:], 0, 1) + + if self._sigdepnoise_type is not None: + if self._sigdepnoise_type == "white": + _added_noise = self._sigdepnoise_level*self._sigdepnoise_rng.normal(scale=_selected_muscle_control) + _selected_muscle_control += _added_noise + elif self._sigdepnoise_type == "whiteonly": #only for debugging purposes + _selected_muscle_control = self._sigdepnoise_level*self._sigdepnoise_rng.normal(scale=_selected_muscle_control) + elif self._sigdepnoise_type == "red": + # self._sigdepnoise_acc *= 1 - 0.1 + self._sigdepnoise_acc += self._sigdepnoise_level*self._sigdepnoise_rng.normal(scale=_selected_muscle_control) + _selected_muscle_control += self._sigdepnoise_acc + else: + raise NotImplementedError(f"{self._sigdepnoise_type}") + if self._constantnoise_type is not None: + if self._constantnoise_type == "white": + _selected_muscle_control += self._constantnoise_level*self._constantnoise_rng.normal(scale=1) + elif self._constantnoise_type == "whiteonly": #only for debugging purposes + _selected_muscle_control = self._constantnoise_level*self._constantnoise_rng.normal(scale=1) + elif self._constantnoise_type == "red": + self._constantnoise_acc += self._constantnoise_level*self._constantnoise_rng.normal(scale=1) + _selected_muscle_control += self._constantnoise_acc + else: + raise NotImplementedError(f"{self._constantnoise_type}") + + # Update smoothed online estimate of motor actuation + self._motor_act = (1 - self._motor_alpha) * self._motor_act \ + + self._motor_alpha * np.clip(_selected_motor_control, 0, 1) + + data.ctrl[self._motor_actuators] = model.actuator_ctrlrange[self._motor_actuators,0] + self._motor_act*(model.actuator_ctrlrange[self._motor_actuators, 1] - model.actuator_ctrlrange[self._motor_actuators, 0]) + data.ctrl[self._muscle_actuators] = np.clip(_selected_muscle_control, 0, 1) + + + @classmethod + def get_xml_file(cls): + """ Overwrite this method if you want to call the mujoco xml file something other than 'bm_model.xml'. """ + return os.path.join(parent_path(inspect.getfile(cls)), "bm_model.xml") + + def get_effort_model(self, specs, dt): + """ Returns an initialised object of the effort model class. + + Overwrite this method if you want to define your effort models somewhere else. But note that in that case you need + to overwrite the 'clone' method as well since it assumes the effort models are defined in + uitb.bm_models.effort_models. + + Args: + specs: Specifications of the effort model, in format of + {"cls": "name-of-class", "kwargs": {"kw1": value1, "kw2": value2}}} + dt: Elapsed time between two consecutive simulation steps + + Returns: + An instance of a class that inherits from the uitb.bm_models.effort_models.BaseEffortModel class + """ + module = importlib.import_module(".".join(BaseBMModel.__module__.split(".")[:-1]) + ".effort_models") + return getattr(module, specs["cls"])(self, **{**specs.get("kwargs", {}), **{"dt": dt}}) + + @classmethod + def clone(cls, simulator_folder, package_name): + """ Clones (i.e. copies) the relevant python files into a new location. + + Args: + simulator_folder: Location of the simulator. + package_name: Name of the simulator (which is a python package) + """ + + # Create 'bm_models' folder + dst = os.path.join(simulator_folder, package_name, "bm_models") + os.makedirs(dst, exist_ok=True) + + # Copy this file + base_file = pathlib.Path(__file__) + shutil.copyfile(base_file, os.path.join(dst, base_file.name)) + + # Create an __init__.py file with the relevant import + modules = cls.__module__.split(".") + with open(os.path.join(dst, "__init__.py"), "w") as file: + file.write("from ." + ".".join(modules[2:]) + " import " + cls.__name__) + + # Copy bm-model folder + src = parent_path(inspect.getfile(cls)) + shutil.copytree(src, os.path.join(dst, src.stem), dirs_exist_ok=True) + + # Copy assets + shutil.copytree(os.path.join(src, "assets"), os.path.join(simulator_folder, package_name, "assets"), + dirs_exist_ok=True) + + # Copy effort models + shutil.copyfile(os.path.join(base_file.parent, "effort_models.py"), os.path.join(dst, "effort_models.py")) + + @classmethod + def insert(cls, simulator_tree): + """ Inserts the biomechanical model into the simulator by integrating the xml files together. + + Args: + simulator_tree: An `xml.etree.ElementTree` containing the parsed simulator xml file + """ + + # Parse xml file + bm_tree = ET.parse(cls.get_xml_file()) + bm_root = bm_tree.getroot() + + # Get simulator root + simulator_root = simulator_tree.getroot() + + # Add defaults + ETutils.copy_or_append("default", bm_root, simulator_root) + + # Add assets, except skybox + ETutils.copy_children("asset", bm_root, simulator_root, + exclude={"tag": "texture", "attrib": "type", "name": "skybox"}) + + # Add bodies, except floor/ground TODO this might not be currently working + if cls._get_floor() is not None: + floor = cls._get_floor() + ETutils.copy_children("worldbody", bm_root, simulator_root, + exclude={"tag": floor["tag"], "attrib": "name", "name": floor["name"]}) + else: + ETutils.copy_children("worldbody", bm_root, simulator_root) + + # Add tendons + ETutils.copy_children("tendon", bm_root, simulator_root) + + # Add actuators + ETutils.copy_children("actuator", bm_root, simulator_root) + + # Add equality constraints + ETutils.copy_children("equality", bm_root, simulator_root) + + def close(self, **kwargs): + """ Perform any necessary clean up. """ + pass + + ############ The methods below you should not overwrite ############ + + @final + def update(self, model, data): + """ Updates the biomechanical model and effort model. """ + self._update(model, data) + self._effort_model.update(model, data) + + @final + def reset(self, model, data): + """ Resets the biomechanical model and effort model. """ + self._reset(model, data) + self._effort_model.reset(model, data) + self.update(model, data) + mujoco.mj_forward(model, data) + + @final + def get_state(self, model, data): + """ Returns the state of the biomechanical model (as a dict). """ + state = dict() + state.update(self._get_state(model, data)) + state.update(self._effort_model._get_state(model, data)) + return state + + @final + def get_effort_cost(self, model, data): + """ Returns effort cost from the effort model. """ + return self._effort_model.cost(model, data) + + @property + @final + def independent_joints(self): + """ Returns indices of independent joints. """ + return self._independent_joints.copy() + + @property + @final + def independent_qpos(self): + """ Returns qpos indices of independent joints. """ + return self._independent_qpos.copy() + + @property + @final + def independent_dofs(self): + """ Returns qvel/qacc indices of independent joints. """ + return self._independent_dofs.copy() + + @property + @final + def nu(self): + """ Returns number of actuators (both muscle and motor). """ + return self._nu + + @property + def motor_act(self): + """ Returns (smoothed average of) motor actuation. """ + return self._motor_act diff --git a/hci/user-in-the-box/uitb/bm_models/effort_models.py b/hci/user-in-the-box/uitb/bm_models/effort_models.py new file mode 100644 index 00000000..040a2040 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/effort_models.py @@ -0,0 +1,305 @@ +from abc import ABC, abstractmethod +import mujoco +import numpy as np + + +class BaseEffortModel(ABC): + + def __init__(self, bm_model, **kwargs): + self._bm_model = bm_model + + @abstractmethod + def cost(self, model, data): + pass + + @abstractmethod + def reset(self, model, data): + pass + + @abstractmethod + def update(self, model, data): + # If needed to e.g. reduce max force output + pass + + def _get_state(self, model, data): + """ Return the state of the effort model. These states are used only for logging/evaluation, not for RL + training + + Args: + model: Mujoco model instance of the simulator. + data: Mujoco data instance of the simulator. + + Returns: + A dict where each key should have a float or a numpy vector as their value + """ + return dict() + + +class Composite(BaseEffortModel): + + def __init__(self, bm_model, weight=1e-7, **kwargs): + super().__init__(bm_model) + self._weight = weight + + def cost(self, model, data): + mujoco.cymj._mj_inverse(model, data) # TODO does this work with new mujoco python bindings? + angle_acceleration = np.sum(data.qacc[self._bm_model.independent_dofs] ** 2) + energy = np.sum(data.qvel[self._bm_model.independent_dofs] ** 2 + * data.qfrc_inverse[self._bm_model.independent_dofs] ** 2) + return self._weight * (energy + 0.05 * angle_acceleration) + + def reset(self, model, data): + pass + + def update(self, model, data): + pass + + +class CumulativeFatigue(BaseEffortModel): + + # 3CC-r model, adapted from https://dl.acm.org/doi/pdf/10.1145/3313831.3376701 for muscles -- using control signals + # instead of torque or force + def __init__(self, bm_model, dt, **kwargs): + super().__init__(bm_model) + self._r = 7.5 + self._F = 0.0146 + self._R = 0.0022 + self._LD = 10 + self._LR = 10 + self._MA = None + self._MR = None + self._weight = 0.01 + self._dt = dt + + def cost(self, model, data): + + # Initialise MA if not yet initialised + if self._MA is None: + self._MA = np.zeros((model.na,)) + self._MR = np.ones((model.na,)) + + # Get target load (actual activation, which might be reached only with some "effort", depending on how many muscles can be activated (fast enough) and how many are in fatigue state) + TL = data.act.copy() + + # Calculate C(t) + C = np.zeros_like(self._MA) + idxs = (self._MA < TL) & (self._MR > (TL - self._MA)) + C[idxs] = self._LD * (TL[idxs] - self._MA[idxs]) + idxs = (self._MA < TL) & (self._MR <= (TL - self._MA)) + C[idxs] = self._LD * self._MR[idxs] + idxs = self._MA >= TL + C[idxs] = self._LR * (TL[idxs] - self._MA[idxs]) + + # Calculate rR + rR = np.zeros_like(self._MA) + idxs = self._MA >= TL + rR[idxs] = self._r*self._R + idxs = self._MA < TL + rR[idxs] = self._R + + # Calculate MA, MR + self._MA += (C - self._F*self._MA)*self._dt + self._MR += (-C + rR*self._MR)*self._dt + + # Not sure if these are needed + self._MA = np.clip(self._MA, 0, 1) + self._MR = np.clip(self._MR, 0, 1) + + # Calculate effort + effort = np.linalg.norm(self._MA - TL) + + return self._weight*effort + + def reset(self, model, data): + self._MA = None + self._MR = None + + def update(self, model, data): + pass + + +class CumulativeFatigue3CCr(BaseEffortModel): + + # 3CC-r model, adapted from https://dl.acm.org/doi/pdf/10.1145/3313831.3376701 for muscles -- using control signals + # instead of torque or force + # v2: now with additional muscle fatigue state + def __init__(self, bm_model, dt, weight=0.01, **kwargs): + super().__init__(bm_model) + self._r = 7.5 + self._F = 0.0146 + self._R = 0.0022 + self._LD = 10 + self._LR = 10 + self._MA = None + self._MR = None + self._MF = None + self._TL = None + self._dt = dt + self._weight = weight + self._effort_cost = None + + def cost(self, model, data): + # Calculate effort + effort = np.linalg.norm(self._MA - self._TL) + self._effort_cost = self._weight*effort + return self._effort_cost + + def reset(self, model, data): + self._MA = np.zeros((model.na,)) + self._MR = np.ones((model.na,)) + self._MF = np.zeros((model.na,)) + + def update(self, model, data): + # Get target load + TL = data.act.copy() + self._TL = TL + + # Calculate C(t) + C = np.zeros_like(self._MA) + idxs = (self._MA < TL) & (self._MR > (TL - self._MA)) + C[idxs] = self._LD * (TL[idxs] - self._MA[idxs]) + idxs = (self._MA < TL) & (self._MR <= (TL - self._MA)) + C[idxs] = self._LD * self._MR[idxs] + idxs = self._MA >= TL + C[idxs] = self._LR * (TL[idxs] - self._MA[idxs]) + + # Calculate rR + rR = np.zeros_like(self._MA) + idxs = self._MA >= TL + rR[idxs] = self._r*self._R + idxs = self._MA < TL + rR[idxs] = self._R + + # Clip C(t) if needed, to ensure that MA, MR, and MF remain between 0 and 1 + C = np.clip(C, np.maximum(-self._MA/self._dt + self._F*self._MA, (self._MR - 1)/self._dt + rR*self._MF), + np.minimum((1 - self._MA)/self._dt + self._F*self._MA, self._MR/self._dt + rR*self._MF)) + + # Update MA, MR, MF + dMA = (C - self._F*self._MA)*self._dt + dMR = (-C + rR*self._MF)*self._dt + dMF = (self._F*self._MA - rR*self._MF)*self._dt + self._MA += dMA + self._MR += dMR + self._MF += dMF + + def _get_state(self, model, data): + state = {"3CCr_MA": self._MA, + "3CCr_MR": self._MR, + "3CCr_MF": self._MF, + "effort_cost": self._effort_cost} + return state + + +class ConsumedEndurance(BaseEffortModel): + + lifting_muscles = ["DELT1", "DELT2", "DELT3", "SUPSP", "INFSP", "SUBSC", "TMIN", "BIClong", "BICshort", "TRIlong", "TRIlat", "TRImed"] + + # consumed endurance model, taken from https://dl.acm.org/doi/pdf/10.1145/2556288.2557130 + def __init__(self, bm_model, dt, weight=0.01, **kwargs): + super().__init__(bm_model) + self._dt = dt + self._weight = weight + self._endurance = None + self._consumed_endurance = None + self._effort_cost = None + + def get_endurance(self, model, data): + #applied_shoulder_torque = np.linalg.norm(data.qfrc_inverse[:]) + #applied_shoulder_torque = np.linalg.norm(data.qfrc_actuator[:]) + lifting_indices = [model.actuator(_i).id for _i in self.lifting_muscles] + applied_shoulder_torques = data.actuator_force[lifting_indices] + maximum_shoulder_torques = model.actuator_gainprm[lifting_indices, 2] + #assert np.all(applied_shoulder_torque <= 0), "Expected only negative values in data.actuator_force." + #strength = np.mean((applied_shoulder_torques/maximum_shoulder_torques)**2) + strength = np.abs(applied_shoulder_torques/maximum_shoulder_torques) #compute strength per muscle + # assert np.all(strength <= 1), f"Applied torque is larger than maximum torque! strength:{strength}, applied:{applied_shoulder_torques}, max:{maximum_shoulder_torques}" + strength = strength.clip(0, 1) + + # if strength > 0.15: + # endurance = (1236.5/((strength*100 - 15)**0.618)) - 72.5 + # else: + # endurance = np.inf + + endurance = np.inf * np.ones_like(strength) + endurance[strength > 0.15] = (1236.5/((strength[strength > 0.15]*100 - 15)**0.618)) - 72.5 + + minimum_endurance = np.min(endurance) + # TODO: take minimum of each muscle synergy, and then apply sum/mean + + return minimum_endurance + + def cost(self, model, data): + # Calculate consumed endurance + self._endurance = self.get_endurance(model, data) + #total_time = data.time + consumed_time = self._dt + + if self._endurance < np.inf: + self._consumed_endurance = (consumed_time/self._endurance)*100 + else: + self._consumed_endurance = 0.0 + + self._effort_cost = self._weight*self._consumed_endurance + return self._effort_cost + + def reset(self, model, data): + #WARNING: bm_model.reset() should reset simulation time (i.e., data.time==0 before the next costs are calculated) + pass + + def update(self, model, data): + pass + + def _get_state(self, model, data): + state = {"consumed_endurance": self._consumed_endurance, + "effort_cost": self._effort_cost} + return state + + +class MuscleState(BaseEffortModel): + + def __init__(self, bm_model, weight=1e-4, **kwargs): + super().__init__(bm_model) + self._weight = weight + + def cost(self, model, data): + return self._weight * np.sum(data.act ** 2) + + def reset(self, model, data): + pass + + def update(self, model, data): + pass + + +class Neural(BaseEffortModel): + + def __init__(self, bm_model, weight=1e-4, **kwargs): + super().__init__(bm_model) + self._weight = weight + self._effort_cost = None + + def cost(self, model, data): + self._effort_cost = self._weight * np.sum(data.ctrl ** 2) + return self._effort_cost + + def reset(self, model, data): + pass + + def update(self, model, data): + pass + + def _get_state(self, model, data): + state = {"effort_cost": self._effort_cost} + return state + + +class Zero(BaseEffortModel): + + def cost(self, model, data): + return 0 + + def reset(self, model, data): + pass + + def update(self, model, data): + pass \ No newline at end of file diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms/MoblArms.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms/MoblArms.py new file mode 100644 index 00000000..d9e9ba75 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms/MoblArms.py @@ -0,0 +1,59 @@ +from ..base import BaseBMModel +from ...utils.functions import initialise_pos_and_quat + +import numpy as np +import mujoco + + +class MoblArms(BaseBMModel): + """This model is based on the MoBL ARMS model, see https://simtk.org/frs/?group_id=657 for the original model in OpenSim, + and https://github.com/aikkala/O2MConverter for the MuJoCo converted model. """ + + def __init__(self, model, data, **kwargs): + super().__init__(model, data, **kwargs) + + # Set shoulder variant; use "none" as default, use "patch-v1" for a qualitatively more reasonable looking movements (not thoroughly tested) + self.shoulder_variant = kwargs.get("shoulder_variant", "none") + + # Update skull rotation if necessary + if "skull_rotation" in kwargs: + + # Rotate skull + model.body("skull").quat = kwargs["skull_rotation"] + + # Do a forward + mujoco.mj_forward(model, data) + + # Check if there are any bodies whose positions need to be updated + # must be 1) weld equality constraint, and 2) obj2_id refer to the skull body. + # It would be nicer if each equality were updated in their respective classes (like unity headset in the unity + # task), but cannot be done with current ordering of task -> bm model -> perception + cond1 = model.eq_type == mujoco.mjtEq.mjEQ_WELD + cond2 = model.eq_obj2id == mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, "skull") + idxs = np.where(np.logical_and(cond1, cond2))[0] + + # Initialise pos and quat according to the relpose for each found body. + for idx in idxs: + initialise_pos_and_quat(model, data, "skull", model.eq_data[idx][3:10], + mujoco.mj_id2name(model, mujoco.mjtObj.mjOBJ_BODY, model.eq_obj1id[idx])) + + def _update(self, model, data): + + # Update shoulder equality constraints + if self.shoulder_variant.startswith("patch"): + model.equality("shoulder1_r2_con").data[1] = \ + -((np.pi - 2 * data.joint('shoulder_elv').qpos) / np.pi) + + if self.shoulder_variant == "patch-v2": + data.joint('shoulder_rot').range[:] = \ + np.array([-np.pi / 2, np.pi / 9]) - \ + 2 * np.min((data.joint('shoulder_elv').qpos, + np.pi - data.joint('shoulder_elv').qpos)) / np.pi \ + * data.joint('elv_angle').qpos + + # Do a forward calculation + mujoco.mj_forward(model, data) + + @classmethod + def _get_floor(cls): + return None diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms/__init__.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms/bm_model.xml b/hci/user-in-the-box/uitb/bm_models/mobl_arms/bm_model.xml new file mode 100644 index 00000000..a1c3886d --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms/bm_model.xml @@ -0,0 +1,669 @@ + + + diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/MoblArmsBimanual.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/MoblArmsBimanual.py new file mode 100644 index 00000000..3d81a303 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/MoblArmsBimanual.py @@ -0,0 +1,67 @@ +from ..base import BaseBMModel +from ...utils.functions import initialise_pos_and_quat + +import numpy as np +import mujoco + + +class MoblArmsBimanual(BaseBMModel): + + def __init__(self, model, data, **kwargs): + super().__init__(model, data, **kwargs) + + # Set shoulder variant + self.shoulder_variant = kwargs.get("shoulder_variant", "none") + if self.shoulder_variant in ["patch-v1", "patch-v2"]: + print("[MoblArmsBimanual.__init__] Warning, 'patch-v1' and 'patch-v2' have not been tested, you should check them out yourself") + + # Update skull rotation if necessary + if "skull_rotation" in kwargs: + + # Rotate skull + model.body("skull").quat = kwargs["skull_rotation"] + + # Do a forward + mujoco.mj_forward(model, data) + + # Check if there are any bodies whose positions need to be updated + # must be 1) weld equality constraint, and 2) obj2_id refer to the skull body. + # It would be nicer if each equality were updated in their respective classes (like unity headset in the unity + # task), but cannot be done with current ordering of task -> bm model -> perception + cond1 = model.eq_type == mujoco.mjtEq.mjEQ_WELD + cond2 = model.eq_obj2id == mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, "skull") + idxs = np.where(np.logical_and(cond1, cond2))[0] + + # Initialise pos and quat according to the relpose for each found body. + for idx in idxs: + initialise_pos_and_quat(model, data, "skull", model.eq_data[idx][3:10], + mujoco.mj_id2name(model, mujoco.mjtObj.mjOBJ_BODY, model.eq_obj1id[idx])) + + def _update(self, model, data): + + # Update shoulder equality constraints + # NOTE! The left arm updates have not been tested + if self.shoulder_variant.startswith("patch"): + model.equality("shoulder1_r2_con_r").data[1] = \ + -((np.pi - 2 * data.joint('shoulder_elv_r').qpos) / np.pi) + model.equality("shoulder1_r2_con_l").data[1] = \ + -((np.pi - 2 * data.joint('shoulder_elv_l').qpos) / np.pi) + + if self.shoulder_variant == "patch-v2": + data.joint('shoulder_rot_r').range[:] = \ + np.array([-np.pi / 2, np.pi / 9]) - \ + 2 * np.min((data.joint('shoulder_elv_r').qpos, + np.pi - data.joint('shoulder_elv_r').qpos)) / np.pi \ + * data.joint('elv_angle_r').qpos + data.joint('shoulder_rot_l').range[:] = \ + np.array([-np.pi / 2, np.pi / 9]) - \ + 2 * np.min((data.joint('shoulder_elv_l').qpos, + np.pi - data.joint('shoulder_elv_l').qpos)) / np.pi \ + * data.joint('elv_angle_l').qpos + + # Do a forward calculation + mujoco.mj_forward(model, data) + + @classmethod + def _get_floor(cls): + return None diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/__init__.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/bm_model.xml b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/bm_model.xml new file mode 100644 index 00000000..6f88b826 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual/bm_model.xml @@ -0,0 +1,3284 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/MoblArmsBimanualMotor.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/MoblArmsBimanualMotor.py new file mode 100644 index 00000000..ce9711e8 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/MoblArmsBimanualMotor.py @@ -0,0 +1,67 @@ +from ..base import BaseBMModel +from ...utils.functions import initialise_pos_and_quat + +import numpy as np +import mujoco + + +class MoblArmsBimanualMotor(BaseBMModel): + + def __init__(self, model, data, **kwargs): + super().__init__(model, data, **kwargs) + + # Set shoulder variant + self.shoulder_variant = kwargs.get("shoulder_variant", "none") + if self.shoulder_variant in ["patch-v1", "patch-v2"]: + print("[MoblArmsBimanualMotor.__init__] Warning, 'patch-v1' and 'patch-v2' have not been tested, you should check them out yourself") + + # Update skull rotation if necessary + if "skull_rotation" in kwargs: + + # Rotate skull + model.body("skull").quat = kwargs["skull_rotation"] + + # Do a forward + mujoco.mj_forward(model, data) + + # Check if there are any bodies whose positions need to be updated + # must be 1) weld equality constraint, and 2) obj2_id refer to the skull body. + # It would be nicer if each equality were updated in their respective classes (like unity headset in the unity + # task), but cannot be done with current ordering of task -> bm model -> perception + cond1 = model.eq_type == mujoco.mjtEq.mjEQ_WELD + cond2 = model.eq_obj2id == mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, "skull") + idxs = np.where(np.logical_and(cond1, cond2))[0] + + # Initialise pos and quat according to the relpose for each found body. + for idx in idxs: + initialise_pos_and_quat(model, data, "skull", model.eq_data[idx][3:10], + mujoco.mj_id2name(model, mujoco.mjtObj.mjOBJ_BODY, model.eq_obj1id[idx])) + + def _update(self, model, data): + + # Update shoulder equality constraints + # NOTE! The left arm updates have not been tested + if self.shoulder_variant.startswith("patch"): + model.equality("shoulder1_r2_con_r").data[1] = \ + -((np.pi - 2 * data.joint('shoulder_elv_r').qpos) / np.pi) + model.equality("shoulder1_r2_con_l").data[1] = \ + -((np.pi - 2 * data.joint('shoulder_elv_l').qpos) / np.pi) + + if self.shoulder_variant == "patch-v2": + data.joint('shoulder_rot_r').range[:] = \ + np.array([-np.pi / 2, np.pi / 9]) - \ + 2 * np.min((data.joint('shoulder_elv_r').qpos, + np.pi - data.joint('shoulder_elv_r').qpos)) / np.pi \ + * data.joint('elv_angle_r').qpos + data.joint('shoulder_rot_l').range[:] = \ + np.array([-np.pi / 2, np.pi / 9]) - \ + 2 * np.min((data.joint('shoulder_elv_l').qpos, + np.pi - data.joint('shoulder_elv_l').qpos)) / np.pi \ + * data.joint('elv_angle_l').qpos + + # Do a forward calculation + mujoco.mj_forward(model, data) + + @classmethod + def _get_floor(cls): + return None diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/__init__.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/bm_model.xml b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/bm_model.xml new file mode 100644 index 00000000..f58fcb8e --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_bimanual_motor/bm_model.xml @@ -0,0 +1,355 @@ + + + + + + + + + + + + + + + + diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/MoblArmsIndex.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/MoblArmsIndex.py new file mode 100644 index 00000000..b9c8faaf --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/MoblArmsIndex.py @@ -0,0 +1,37 @@ +from ..base import BaseBMModel + +import numpy as np +import mujoco + + +class MoblArmsIndex(BaseBMModel): + """This model is based on the MoBL ARMS model, see https://simtk.org/frs/?group_id=657 for the original model in OpenSim, + and https://github.com/aikkala/O2MConverter for the MuJoCo converted model. This model is the same as the one in uitb/bm_models/mobl_arms, except + the index finger is flexed and it contains a force sensor. """ + + def __init__(self, model, data, **kwargs): + super().__init__(model, data, **kwargs) + + # Set shoulder variant; use "none" as default, use "patch-v1" for a qualitatively more reasonable looking movements (not thoroughly tested) + self.shoulder_variant = kwargs.get("shoulder_variant", "none") + + def _update(self, model, data): + + # Update shoulder equality constraints + if self.shoulder_variant.startswith("patch"): + model.equality("shoulder1_r2_con").data[1] = \ + -((np.pi - 2 * data.joint('shoulder_elv').qpos) / np.pi) + + if self.shoulder_variant == "patch-v2": + data.joint('shoulder_rot').range[:] = \ + np.array([-np.pi / 2, np.pi / 9]) - \ + 2 * np.min((data.joint('shoulder_elv').qpos, + np.pi - data.joint('shoulder_elv').qpos)) / np.pi \ + * data.joint('elv_angle').qpos + + # Do a forward calculation + mujoco.mj_forward(model, data) + + @classmethod + def _get_floor(cls): + return None diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/__init__.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/bm_model.xml b/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/bm_model.xml new file mode 100644 index 00000000..91591604 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_index/bm_model.xml @@ -0,0 +1,666 @@ + + + diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/MoblArmsWrist.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/MoblArmsWrist.py new file mode 100644 index 00000000..2691b005 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/MoblArmsWrist.py @@ -0,0 +1,57 @@ +from ..base import BaseBMModel +from ...utils.functions import initialise_pos_and_quat + +import numpy as np +import mujoco + + +class MoblArmsWrist(BaseBMModel): + + def __init__(self, model, data, **kwargs): + super().__init__(model, data, **kwargs) + + # Set shoulder variant + self.shoulder_variant = kwargs.get("shoulder_variant", "patch-v1") + + # Update skull rotation if necessary + if "skull_rotation" in kwargs: + + # Rotate skull + model.body("skull").quat = kwargs["skull_rotation"] + + # Do a forward + mujoco.mj_forward(model, data) + + # Check if there are any bodies whose positions need to be updated + # must be 1) weld equality constraint, and 2) obj2_id refer to the skull body. + # It would be nicer if each equality were updated in their respective classes (like unity headset in the unity + # task), but cannot be done with current ordering of task -> bm model -> perception + cond1 = model.eq_type == mujoco.mjtEq.mjEQ_WELD + cond2 = model.eq_obj2id == mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, "skull") + idxs = np.where(np.logical_and(cond1, cond2))[0] + + # Initialise pos and quat according to the relpose for each found body. + for idx in idxs: + initialise_pos_and_quat(model, data, "skull", model.eq_data[idx][3:10], + mujoco.mj_id2name(model, mujoco.mjtObj.mjOBJ_BODY, model.eq_obj1id[idx])) + + def _update(self, model, data): + + # Update shoulder equality constraints + if self.shoulder_variant.startswith("patch"): + model.equality("shoulder1_r2_con").data[1] = \ + -((np.pi - 2 * data.joint('shoulder_elv').qpos) / np.pi) + + if self.shoulder_variant == "patch-v2": + data.joint('shoulder_rot').range[:] = \ + np.array([-np.pi / 2, np.pi / 9]) - \ + 2 * np.min((data.joint('shoulder_elv').qpos, + np.pi - data.joint('shoulder_elv').qpos)) / np.pi \ + * data.joint('elv_angle').qpos + + # Do a forward calculation + mujoco.mj_forward(model, data) + + @classmethod + def _get_floor(cls): + return None diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/__init__.py b/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/bm_model.xml b/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/bm_model.xml new file mode 100644 index 00000000..b7217240 --- /dev/null +++ b/hci/user-in-the-box/uitb/bm_models/mobl_arms_wrist/bm_model.xml @@ -0,0 +1,718 @@ + + + diff --git a/hci/user-in-the-box/uitb/configs/grad_small_object_touch.yaml b/hci/user-in-the-box/uitb/configs/grad_small_object_touch.yaml new file mode 100644 index 00000000..1718eeb5 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/grad_small_object_touch.yaml @@ -0,0 +1,71 @@ +# Graduation demo C: small-object touch task (build with Simulator.build) +simulator_name: grad_small_object_touch +simulator_folder: simulators/grad_small_object_touch + +simulation: + bm_model: + cls: MoblArmsIndex + kwargs: + shoulder_variant: none + effort_model: + cls: Neural + kwargs: + weight: 1e-4 + + task: + cls: SmallObjectTouch + kwargs: + end_effector: [geom, hand_2distph] + shoulder: [body, humphant] + touch_bonus: 0.25 + target_radius_limit: [0.018, 0.045] + max_trials: 8 + stateful_information_encoder: + module: rl.encoders + cls: Identity + + perception_modules: + - cls: vision.FixedEye + kwargs: + resolution: [120, 80] + channels: [3] + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + encoder: + module: rl.encoders + cls: SmallCNN + kwargs: + out_features: 256 + - cls: proprioception.BasicWithEndEffectorPosition + kwargs: + end_effector: [geom, hand_2distph] + encoder: + module: rl.encoders + cls: OneLayer + kwargs: + out_features: 128 + run_parameters: + action_sample_freq: 20 + +rl: + algorithm: PPO + policy_type: policies.MultiInputActorCriticPolicyTanhActions + policy_kwargs: + activation_fn: torch.nn.LeakyReLU + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: feature_extractor.FeatureExtractor + normalize_images: false + lr: + function: schedule.linear_schedule + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.85 + total_timesteps: 5_000_000 + device: cpu + num_workers: 4 + nsteps: 2048 + batch_size: 256 + target_kl: 1.0 + save_freq: 1_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_beatsvr_bimanual.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_beatsvr_bimanual.yaml new file mode 100644 index 00000000..cef14c3c --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_beatsvr_bimanual.yaml @@ -0,0 +1,66 @@ +simulator_name: "beatsvr_neural_1e3" +project: "unity" + +simulation: + bm_model: + cls: "MoblArmsBimanualMotor" + kwargs: + shoulder_variant: "none" + #skull_rotation: [0.9961947, 0, 0, -0.0871557] + effort_model: + cls: "Neural" + kwargs: + weight: 1e-3 + task: + cls: "UnityEnv" + kwargs: +# app_args: ["-right_hand_only"] + unity_executable: "apps/beats-vr-linux/build.x86_64" + standalone: true # set to False for debugging (open the app in Unity editor) + # port: 5555 # uncomment for debugging (connects to this port instead of finding a random open one) + time_scale: 5 + max_episode_length_seconds: 10 + gear: "oculus-quest-1" + left_controller_enabled: true + left_controller_body: "hand_l" + left_controller_relpose: [-0.06777743, 0.02224773, 0.00873933, 0.5382897, 0.72938, -0.3715395, 0.200518] + right_controller_body: "hand_r" + right_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + headset_body: "skull" + headset_relpose: [-0.10703, 0.001594, -0.19141, 0.7071068, 0.7071068, 0, 0] + perception_modules: + - cls: "vision.UnityHeadset" + kwargs: + resolution: [120, 80] # must match dimensions used in unity + channels: [0,2] + buffer: 0.2 + use_buffer_difference: false + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: [["geom", "controller-right"], ["geom", "controller-left"]] + run_parameters: + action_sample_freq: 20 + info_keywords: [["AccumulatedErrors", "final"], ["AccumulatedCorrectSlices", "final"]] + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_index_choice_reaction.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_index_choice_reaction.yaml new file mode 100644 index 00000000..c464acf9 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_index_choice_reaction.yaml @@ -0,0 +1,51 @@ +simulator_name: "mobl_arms_index_choice_reaction" + +simulation: + bm_model: + cls: "MoblArmsIndex" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "ChoiceReaction" + kwargs: + end_effector: ["geom", "hand_2distph"] + shoulder: ["body", "humphant"] + perception_modules: + - cls: "vision.FixedEye" + kwargs: + resolution: [120, 80] + channels: [0, 1, 2, 3] + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 50_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 500 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_index_choice_reaction_HRL.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_index_choice_reaction_HRL.yaml new file mode 100644 index 00000000..17dcce14 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_index_choice_reaction_HRL.yaml @@ -0,0 +1,57 @@ +simulator_name: "mobl_arms_index_choice_reaction_HRL" + +simulation: + bm_model: + cls: "MoblArmsIndex" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "ChoiceReaction" + kwargs: + end_effector: ["geom", "hand_2distph"] + shoulder: ["body", "humphant"] + perception_modules: + - cls: "vision.FixedEye" + kwargs: + resolution: [120, 80] + channels: [0, 1, 2, 3] + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 15_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 500 + target_kl: 1.0 + save_freq: 5_000_000 + +llc: + llc_ratio: 1 + simulator_name: "mobl_arms_llc" + checkpoint: "model_270000000_steps.zip" + joints: ["elv_angle", "shoulder_elv", "shoulder_rot", "elbow_flexion", "pro_sup"] diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing.yaml new file mode 100644 index 00000000..85dd6968 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing.yaml @@ -0,0 +1,71 @@ +simulator_name: "mobl_arms_index_pointing" + +simulation: + + bm_model: + cls: "MoblArmsIndex" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + + task: + cls: "Pointing" + kwargs: + end_effector: ["geom", "hand_2distph"] + shoulder: ["body", "humphant"] + # It's not necessary to define the stateful_information_encoder to Identity encoder, as that is used by default + stateful_information_encoder: + module: "rl.encoders" + cls: "Identity" + + perception_modules: + - cls: "vision.FixedEye" + kwargs: + resolution: [120, 80] + channels: [3] + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + # It's not necessary to define the encoder to SmallCNN, as vision.FixedEye uses that by default + encoder: + module: "rl.encoders" + cls: "SmallCNN" + kwargs: + out_features: 256 + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + # It's not necessary to define the encoder to OneLayer, as proprioception.BasicWithEndEffectorPosition uses that + # by default + encoder: + module: "rl.encoders" + cls: "OneLayer" + kwargs: + out_features: 128 + run_parameters: + action_sample_freq: 20 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 500 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing_HRL.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing_HRL.yaml new file mode 100644 index 00000000..d6e08ca6 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing_HRL.yaml @@ -0,0 +1,71 @@ +simulator_name: "mobl_arms_index_pointing_HRL" + +simulation: + + bm_model: + cls: "MoblArmsIndex" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + + task: + cls: "Pointing" + kwargs: + end_effector: ["geom", "hand_2distph"] + shoulder: ["body", "humphant"] + # It's not necessary to define the stateful_information_encoder to Identity encoder, as that is used by default + stateful_information_encoder: + module: "rl.encoders" + cls: "Identity" + + perception_modules: + - cls: "vision.FixedEye" + kwargs: + resolution: [120, 80] + channels: [3] + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + # It's not necessary to define the encoder to SmallCNN, as vision.FixedEye uses that by default + encoder: + module: "rl.encoders" + cls: "SmallCNN" + kwargs: + out_features: 256 + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + + run_parameters: + action_sample_freq: 40 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 40_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 500 + target_kl: 1.0 + save_freq: 5_000_000 + +llc: + llc_ratio: 1 + simulator_name: "mobl_arms_llc" + checkpoint: "model_270000000_steps.zip" + joints: ["elv_angle", "shoulder_elv", "shoulder_rot", "elbow_flexion", "pro_sup"] diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing_precision.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing_precision.yaml new file mode 100644 index 00000000..fc90d863 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_index_pointing_precision.yaml @@ -0,0 +1,70 @@ +# 在标准指向任务基础上缩小目标半径上界,用于评测「精细指向 / 类抓取触点」控制难度。 +# 构建示例: 在 Python 中调用 Simulator.build("uitb/configs/mobl_arms_index_pointing_precision.yaml", output_folder=...) +simulator_name: "mobl_arms_index_pointing_precision" + +simulation: + + bm_model: + cls: "MoblArmsIndex" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + + task: + cls: "Pointing" + kwargs: + end_effector: ["geom", "hand_2distph"] + shoulder: ["body", "humphant"] + target_radius_limit: [0.012, 0.04] + stateful_information_encoder: + module: "rl.encoders" + cls: "Identity" + + perception_modules: + - cls: "vision.FixedEye" + kwargs: + resolution: [120, 80] + channels: [3] + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + encoder: + module: "rl.encoders" + cls: "SmallCNN" + kwargs: + out_features: 256 + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + encoder: + module: "rl.encoders" + cls: "OneLayer" + kwargs: + out_features: 128 + run_parameters: + action_sample_freq: 20 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 500 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_index_remote_driving.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_index_remote_driving.yaml new file mode 100644 index 00000000..68983630 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_index_remote_driving.yaml @@ -0,0 +1,72 @@ +simulator_name: "mobl_arms_index_remote_driving" + +simulation: + bm_model: + cls: "MoblArmsIndex" + kwargs: + shoulder_variant: "patch-v1" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "RemoteDriving" + kwargs: + end_effector: "hand_2distph" + extratime_length_seconds: 0 + episode_length_seconds: 10 + car_velocity_threshold: 0.05 + reward_function: + cls: "NegativeExpDistance" + kwargs: + joystick_specs: + k: 3.0 + shift: -1.0 + scale: 1.0 + bonus_active: False + target_specs: + k: 3.0 + shift: -1.0 + scale: 0.1 + bonus_active: True + bonus: 1.0 + perception_modules: + - cls: "vision.FixedEye" + kwargs: + resolution: [120, 80] + channels: [0] + buffer: 0.1 + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + - cls: "tactile.RectangularCuboidGrid" + kwargs: + geom: "hand_2distph" + resolution: [0, 1, 2] + run_parameters: + action_sample_freq: 20 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.9 + total_timesteps: 200_000_000 + device: "cuda" + num_workers: 20 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 \ No newline at end of file diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_index_tracking.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_index_tracking.yaml new file mode 100644 index 00000000..1fe51fae --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_index_tracking.yaml @@ -0,0 +1,61 @@ +simulator_name: "mobl_arms_index_tracking" + +simulation: + bm_model: + cls: "MoblArmsIndex" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "Tracking" + kwargs: + end_effector: ["geom", "hand_2distph"] + shoulder: ["body", "humphant"] + episode_length_seconds: 10 + perception_modules: + - cls: "vision.FixedEye" + kwargs: + resolution: [120, 80] + channels: [3] + buffer: 0.1 + pos: "0 0 1.2" + quat: "0.583833 0.399104 -0.399421 -0.583368" + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + callbacks: + - name: "freq_curriculum" + cls: "rl.sb3.callbacks.LinearCurriculum" + kwargs: + start_value: 0 + end_value: 1 + start_timestep: 40_000_000 + end_timestep: 80_000_000 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 500 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_llc.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_llc.yaml new file mode 100644 index 00000000..63d7889a --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_llc.yaml @@ -0,0 +1,44 @@ +simulator_name: "mobl_arms_llc" + +simulation: + bm_model: + cls: "MoblArms" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "LowLevelController" + kwargs: + joints: ["elv_angle", "shoulder_elv", "shoulder_rot", "elbow_flexion", "pro_sup"] + perception_modules: + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 200_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 \ No newline at end of file diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_llc_4s_50hz.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_4s_50hz.yaml new file mode 100644 index 00000000..7fc8f487 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_4s_50hz.yaml @@ -0,0 +1,46 @@ +simulator_name: "mobl_arms_llc_4s_50hz" + +simulation: + bm_model: + cls: "MoblArms" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "LowLevelController" + kwargs: + joints: ["elv_angle", "shoulder_elv", "shoulder_rot", "elbow_flexion", "pro_sup"] + perception_modules: + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 50 + max_trial_time: 4 + dwell_time: 0.5 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 200_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_llc_5s.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_5s.yaml new file mode 100644 index 00000000..22dc285d --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_5s.yaml @@ -0,0 +1,46 @@ +simulator_name: "mobl_arms_llc_5s" + +simulation: + bm_model: + cls: "MoblArms" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "LowLevelController" + kwargs: + joints: ["elv_angle", "shoulder_elv", "shoulder_rot", "elbow_flexion", "pro_sup"] + perception_modules: + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + max_trial_time: 5 + dwell_time: 0.5 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 200_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_llc_8s_50hz.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_8s_50hz.yaml new file mode 100644 index 00000000..e1f3a8d6 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_8s_50hz.yaml @@ -0,0 +1,47 @@ +simulator_name: "mobl_arms_llc_8s_50hz" + +simulation: + bm_model: + cls: "MoblArms" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + + task: + cls: "LowLevelController" + kwargs: + joints: ["elv_angle", "shoulder_elv", "shoulder_rot", "elbow_flexion", "pro_sup"] + perception_modules: + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 50 + max_trial_time: 8 + dwell_time: 0.5 + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 200_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_llc_trackall_5s.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_trackall_5s.yaml new file mode 100644 index 00000000..c43b0a49 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_llc_trackall_5s.yaml @@ -0,0 +1,48 @@ +simulator_name: "mobl_arms_llc_trackall_5s" + +simulation: + bm_model: + cls: "MoblArms" + kwargs: + shoulder_variant: "none" + effort_model: + cls: "Neural" + kwargs: + weight: 1e-4 + task: + cls: "LowLevelController" + kwargs: + joints: ["elv_angle", "shoulder_elv", "shoulder_rot", "elbow_flexion", "pro_sup"] #deprecated + track_all_joints: True + perception_modules: + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + max_trial_time: 5 + dwell_time: 0.5 + + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 200_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_adaptive_constrained.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_adaptive_constrained.yaml new file mode 100644 index 00000000..45992bbd --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_adaptive_constrained.yaml @@ -0,0 +1,69 @@ +simulator_name: "whacamole_adaptive_constrained" +project: "unity" + +simulation: + bm_model: + cls: "MoblArmsWrist" + kwargs: + shoulder_variant: "none" + skull_rotation: [0.9961947, 0, 0, -0.0871557] + effort_model: + cls: "CumulativeFatigue3CCr" + kwargs: + weight: 1e-1 + task: + cls: "UnityEnv" + kwargs: + unity_executable: "apps/whac-a-mole-linux/build.x86_64" + app_args: ["-condition", "random", "-adaptive"] + standalone: true # set to False for debugging (open the app in Unity editor) + #port: 5555 # uncomment for debugging (connects to this port instead of finding a random open one) + time_scale: 5 + gear: "oculus-quest-1" + left_controller_enabled: false +# left_controller_body: "hand" +# left_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + right_controller_body: "hand" + right_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + headset_body: "skull" + headset_relpose: [-0.10703, 0.001594, -0.19141, 0.7071068, 0.7071068, 0, 0] + perception_modules: + - cls: "vision.UnityHeadset" + kwargs: + resolution: [120, 80] # must match dimensions used in unity + channels: [1,3] + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + info_keywords: [["Points", "final"], ["RewardUnsuccessfulContact", "sum"], ["DistanceReward", "sum"], ["EffortCost", "sum"], + ["failrateTarget0", "final"], ["failrateTarget1", "final"], ["failrateTarget2", "final"], + ["failrateTarget3", "final"], ["failrateTarget4", "final"], ["failrateTarget5", "final"], + ["failrateTarget6", "final"], ["failrateTarget7", "final"], ["failrateTarget8", "final"]] + # callbacks: + # - name: "custom_train_logger" + # cls: "rl.sb3.callbacks.CustomTrainLogCallback" + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_adaptive_unconstrained.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_adaptive_unconstrained.yaml new file mode 100644 index 00000000..6ce781de --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_adaptive_unconstrained.yaml @@ -0,0 +1,69 @@ +simulator_name: "whacamole_adaptive_unconstrained" +project: "unity" + +simulation: + bm_model: + cls: "MoblArmsWrist" + kwargs: + shoulder_variant: "none" + skull_rotation: [0.9961947, 0, 0, -0.0871557] + effort_model: + cls: "CumulativeFatigue3CCr" + kwargs: + weight: 1e-1 + task: + cls: "UnityEnv" + kwargs: + unity_executable: "apps/whac-a-mole-linux/build.x86_64" + app_args: ["-condition", "random-unconstrained", "-adaptive"] + standalone: true # set to False for debugging (open the app in Unity editor) + #port: 5555 # uncomment for debugging (connects to this port instead of finding a random open one) + time_scale: 5 + gear: "oculus-quest-1" + left_controller_enabled: false +# left_controller_body: "hand" +# left_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + right_controller_body: "hand" + right_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + headset_body: "skull" + headset_relpose: [-0.10703, 0.001594, -0.19141, 0.7071068, 0.7071068, 0, 0] + perception_modules: + - cls: "vision.UnityHeadset" + kwargs: + resolution: [120, 80] # must match dimensions used in unity + channels: [1,3] + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + info_keywords: [["Points", "final"], ["RewardUnsuccessfulContact", "sum"], ["DistanceReward", "sum"], ["EffortCost", "sum"], + ["failrateTarget0", "final"], ["failrateTarget1", "final"], ["failrateTarget2", "final"], + ["failrateTarget3", "final"], ["failrateTarget4", "final"], ["failrateTarget5", "final"], + ["failrateTarget6", "final"], ["failrateTarget7", "final"], ["failrateTarget8", "final"]] + # callbacks: + # - name: "custom_train_logger" + # cls: "rl.sb3.callbacks.CustomTrainLogCallback" + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_constrained.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_constrained.yaml new file mode 100644 index 00000000..5178a321 --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_constrained.yaml @@ -0,0 +1,66 @@ +simulator_name: "whacamole_constrained" +project: "unity" + +simulation: + bm_model: + cls: "MoblArmsWrist" + kwargs: + shoulder_variant: "none" + skull_rotation: [0.9961947, 0, 0, -0.0871557] + effort_model: + cls: "CumulativeFatigue3CCr" + kwargs: + weight: 1e-1 + task: + cls: "UnityEnv" + kwargs: + unity_executable: "apps/whac-a-mole-linux/build.x86_64" + app_args: ["-condition", "random"] + standalone: true # set to False for debugging (open the app in Unity editor) + #port: 5555 # uncomment for debugging (connects to this port instead of finding a random open one) + time_scale: 5 + gear: "oculus-quest-1" + left_controller_enabled: false +# left_controller_body: "hand" +# left_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + right_controller_body: "hand" + right_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + headset_body: "skull" + headset_relpose: [-0.10703, 0.001594, -0.19141, 0.7071068, 0.7071068, 0, 0] + perception_modules: + - cls: "vision.UnityHeadset" + kwargs: + resolution: [120, 80] # must match dimensions used in unity + channels: [1,3] + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + info_keywords: [["Points", "final"], ["RewardUnsuccessfulContact", "sum"], ["DistanceReward", "sum"], ["EffortCost", "sum"], + ["failrateTarget0", "final"], ["failrateTarget1", "final"], ["failrateTarget2", "final"], + ["failrateTarget3", "final"], ["failrateTarget4", "final"], ["failrateTarget5", "final"], + ["failrateTarget6", "final"], ["failrateTarget7", "final"], ["failrateTarget8", "final"]] + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_unconstrained.yaml b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_unconstrained.yaml new file mode 100644 index 00000000..5d6ffcdd --- /dev/null +++ b/hci/user-in-the-box/uitb/configs/mobl_arms_whacamole_unconstrained.yaml @@ -0,0 +1,66 @@ +simulator_name: "whacamole_unconstrained" +project: "unity" + +simulation: + bm_model: + cls: "MoblArmsWrist" + kwargs: + shoulder_variant: "none" + skull_rotation: [0.9961947, 0, 0, -0.0871557] + effort_model: + cls: "CumulativeFatigue3CCr" + kwargs: + weight: 1e-1 + task: + cls: "UnityEnv" + kwargs: + unity_executable: "apps/whac-a-mole-linux/build.x86_64" + app_args: ["-condition", "random-unconstrained"] + standalone: true # set to False for debugging (open the app in Unity editor) + #port: 5555 # uncomment for debugging (connects to this port instead of finding a random open one) + time_scale: 5 + gear: "oculus-quest-1" + left_controller_enabled: false +# left_controller_body: "hand" +# left_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + right_controller_body: "hand" + right_controller_relpose: [-0.06694103, -0.00423279, 0.00806701, 0.79349534, 0.49238653, -0.24941408, 0.256346] + headset_body: "skull" + headset_relpose: [-0.10703, 0.001594, -0.19141, 0.7071068, 0.7071068, 0, 0] + perception_modules: + - cls: "vision.UnityHeadset" + kwargs: + resolution: [120, 80] # must match dimensions used in unity + channels: [1,3] + - cls: "proprioception.BasicWithEndEffectorPosition" + kwargs: + end_effector: ["geom", "hand_2distph"] + run_parameters: + action_sample_freq: 20 + info_keywords: [["Points", "final"], ["RewardUnsuccessfulContact", "sum"], ["DistanceReward", "sum"], ["EffortCost", "sum"], + ["failrateTarget0", "final"], ["failrateTarget1", "final"], ["failrateTarget2", "final"], + ["failrateTarget3", "final"], ["failrateTarget4", "final"], ["failrateTarget5", "final"], + ["failrateTarget6", "final"], ["failrateTarget7", "final"], ["failrateTarget8", "final"]] + +rl: + algorithm: "PPO" + policy_type: "policies.MultiInputActorCriticPolicyTanhActions" + policy_kwargs: + activation_fn: "torch.nn.LeakyReLU" + net_arch: [256, 256] + log_std_init: 0.0 + features_extractor_class: "feature_extractor.FeatureExtractor" + normalize_images: False + lr: + function: "schedule.linear_schedule" + kwargs: + initial_value: 5e-5 + min_value: 1e-7 + threshold: 0.8 + total_timesteps: 100_000_000 + device: "cuda" + num_workers: 10 + nsteps: 4000 + batch_size: 1000 + target_kl: 1.0 + save_freq: 5_000_000 diff --git a/hci/user-in-the-box/uitb/simulator.py b/hci/user-in-the-box/uitb/simulator.py new file mode 100644 index 00000000..954b06b7 --- /dev/null +++ b/hci/user-in-the-box/uitb/simulator.py @@ -0,0 +1,816 @@ +import gymnasium as gym +from gymnasium import spaces +import pygame +import mujoco +import os +import numpy as np +import scipy +import matplotlib +import sys +import importlib +import shutil +import inspect +import pathlib +from datetime import datetime +import copy +from collections import defaultdict +import xml.etree.ElementTree as ET + +from stable_baselines3 import PPO #required to load a trained LLC policy in HRL approach + +from .perception.base import Perception +from .utils.rendering import Camera, Context +from .utils.functions import output_path, parent_path, is_suitable_package_name, parse_yaml, write_yaml + + +class Simulator(gym.Env): + """ + The Simulator class contains functionality to build a standalone Python package from a config file. The built package + integrates a biomechanical model, a task model, and a perception model into one simulator that implements a gym.Env + interface. + """ + + # May be useful for later, the three digit number suffix is of format X.Y.Z where X is a major version. + version = "1.1.0" + + @classmethod + def get_class(cls, *args): + """ Returns a class from given strings. The last element in args should contain the class name. """ + # TODO check for incorrect module names etc + modules = ".".join(args[:-1]) + if "." in args[-1]: + splitted = args[-1].split(".") + if modules == "": + modules = ".".join(splitted[:-1]) + else: + modules += "." + ".".join(splitted[:-1]) + cls_name = splitted[-1] + else: + cls_name = args[-1] + module = cls.get_module(modules) + return getattr(module, cls_name) + + @classmethod + def get_module(cls, *args): + """ Returns a module from given strings. """ + src = __name__.split(".")[0] + modules = ".".join(args) + return importlib.import_module(src + "." + modules) + + @classmethod + def build(cls, config): + """ Builds a simulator based on a config. The input 'config' may be a dict (parsed from YAML) or path to a YAML file + + Args: + config: + - A dict containing configuration information. See example configs in folder uitb/configs/ + - A path to a config file + """ + + # If config is a path to the config file, parse it first + if isinstance(config, str): + if not os.path.isfile(config): + raise FileNotFoundError(f"Given config file {config} does not exist") + config = parse_yaml(config) + + # Make sure required things are defined in config + assert "simulation" in config, "Simulation specs (simulation) must be defined in config" + assert "bm_model" in config["simulation"], "Biomechanical model (bm_model) must be defined in config" + assert "task" in config["simulation"], "task (task) must be defined in config" + + assert "run_parameters" in config["simulation"], "Run parameters (run_parameters) must be defined in config" + run_parameters = config["simulation"]["run_parameters"].copy() + assert "action_sample_freq" in run_parameters, "Action sampling frequency (action_sample_freq) must be defined " \ + "in run parameters" + + # Set simulator version + config["version"] = cls.version + + # Save generated simulators to uitb/simulators + if "simulator_folder" in config: + simulator_folder = os.path.normpath(config["simulator_folder"]) + else: + simulator_folder = os.path.join(output_path(), config["simulator_name"]) + + # If 'package_name' is not defined use 'simulator_name' + if "package_name" not in config: + config["package_name"] = config["simulator_name"] + if not is_suitable_package_name(config["package_name"]): + raise NameError("Package name defined in the config file (either through 'package_name' or 'simulator_name') is " + "not a suitable Python package name. Use only lower-case letters and underscores instead of " + "spaces, and the name cannot start with a number.") + + # The name used in gym has a suffix -v0 + config["gym_name"] = "uitb:" + config["package_name"] + "-v0" + + # Create a simulator in the simulator folder + cls._clone(simulator_folder, config["package_name"]) + + # Load task class + task_cls = cls.get_class("tasks", config["simulation"]["task"]["cls"]) + task_cls.clone(simulator_folder, config["package_name"], app_executable=config["simulation"]["task"].get("kwargs", {}).get("unity_executable", None)) + simulation = task_cls.initialise(config["simulation"]["task"].get("kwargs", {})) + + # Set some compiler options + # TODO: would make more sense to have a separate "environment" class / xml file that defines all these defaults, + # including e.g. cameras, lighting, etc., so that they could be easily changed. Task and biomechanical model would + # be integrated into that object + compiler_defaults = {"inertiafromgeom": "auto", "balanceinertia": "true", "boundmass": "0.001", + "boundinertia": "0.001", "inertiagrouprange": "0 1"} + compiler = simulation.find("compiler") + if compiler is None: + ET.SubElement(simulation, "compiler", compiler_defaults) + else: + compiler.attrib.update(compiler_defaults) + + # Load biomechanical model class + bm_cls = cls.get_class("bm_models", config["simulation"]["bm_model"]["cls"]) + bm_cls.clone(simulator_folder, config["package_name"]) + bm_cls.insert(simulation) + + # Add perception modules + for module_cfg in config["simulation"].get("perception_modules", []): + module_cls = cls.get_class("perception", module_cfg["cls"]) + module_kwargs = module_cfg.get("kwargs", {}) + module_cls.clone(simulator_folder, config["package_name"]) + module_cls.insert(simulation, **module_kwargs) + + # Clone also RL library files so the package will be completely standalone + rl_cls = cls.get_class("rl", config["rl"]["algorithm"]) + rl_cls.clone(simulator_folder, config["package_name"]) + + # TODO read the xml file directly from task.getroot() instead of writing it to a file first; need to input a dict + # of assets to mujoco.MjModel.from_xml_path + simulation_file = os.path.join(simulator_folder, config["package_name"], "simulation") + with open(simulation_file+".xml", 'w') as file: + simulation.write(file, encoding='unicode') + + # Initialise the simulator + model, _, _, _, _, _ = \ + cls._initialise(config, simulator_folder, {**run_parameters, "build": True}) + + # Now that simulator has been initialised, everything should be set. Now we want to save the xml file again, but + # mujoco only is able to save the latest loaded xml file (which is either the task or bm model xml files which are + # are read in their __init__ functions), hence we need to read the file we've generated again before saving the + # modified model + mujoco.MjModel.from_xml_path(simulation_file+".xml") + mujoco.mj_saveLastXML(simulation_file+".xml", model) + + # Save the modified model also as binary for faster loading + mujoco.mj_saveModel(model, simulation_file+".mjcf", None) + + # Input built time into config + config["built"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + # Save config + write_yaml(config, os.path.join(simulator_folder, "config.yaml")) + + return simulator_folder + + @classmethod + def _clone(cls, simulator_folder, package_name): + """ Create a folder for the simulator being built, and copy or create relevant files. + + Args: + simulator_folder: Location of the simulator. + package_name: Name of the simulator (which is a python package). + """ + + # Create the folder + dst = os.path.join(simulator_folder, package_name) + os.makedirs(dst, exist_ok=True) + + # Copy simulator + src = pathlib.Path(inspect.getfile(cls)) + shutil.copyfile(src, os.path.join(dst, src.name)) + + # Create __init__.py with env registration + with open(os.path.join(dst, "__init__.py"), "w") as file: + file.write("from .simulator import Simulator\n\n") + file.write("from gymnasium.envs.registration import register\n") + file.write("import pathlib\n\n") + file.write("module_folder = pathlib.Path(__file__).parent\n") + file.write("simulator_folder = module_folder.parent\n") + file.write("kwargs = {'simulator_folder': simulator_folder}\n") + file.write("register(id=f'{module_folder.stem}-v0', entry_point=f'{module_folder.stem}.simulator:Simulator', kwargs=kwargs)\n") + + # Copy utils + shutil.copytree(os.path.join(parent_path(src), "utils"), os.path.join(simulator_folder, package_name, "utils"), + dirs_exist_ok=True) + # Copy train + shutil.copytree(os.path.join(parent_path(src), "train"), os.path.join(simulator_folder, package_name, "train"), + dirs_exist_ok=True) + # Copy test + shutil.copytree(os.path.join(parent_path(src), "test"), os.path.join(simulator_folder, package_name, "test"), + dirs_exist_ok=True) + + @classmethod + def _initialise(cls, config, simulator_folder, run_parameters): + """ Initialise a simulator -- i.e., create a MjModel, MjData, and initialise all necessary variables. + + Args: + config: A config dict. + simulator_folder: Location of the simulator. + run_parameters: Important run time variables that may also be used to override parameters. + """ + + # Get task class and kwargs + task_cls = cls.get_class("tasks", config["simulation"]["task"]["cls"]) + task_kwargs = config["simulation"]["task"].get("kwargs", {}) + + # Get bm class and kwargs + bm_cls = cls.get_class("bm_models", config["simulation"]["bm_model"]["cls"]) + bm_kwargs = config["simulation"]["bm_model"].get("kwargs", {}) + + # Initialise perception modules + perception_modules = {} + for module_cfg in config["simulation"].get("perception_modules", []): + module_cls = cls.get_class("perception", module_cfg["cls"]) + module_kwargs = module_cfg.get("kwargs", {}) + perception_modules[module_cls] = module_kwargs + + # Get simulation file + simulation_file = os.path.join(simulator_folder, config["package_name"], "simulation") + + # Load the mujoco model; try first with the binary model (faster, contains some parameters that may be lost when + # re-saving xml files like body mass). For some reason the binary model fails to load in some situations (like + # when the simulator has been built on a different computer) + # TODO loading from binary disabled, weird problems (like a body not found from model when loaded from binary, but + # found correctly when model loaded from xml) + # try: + # model = mujoco.MjModel.from_binary_path(simulation_file + ".mjcf") + # except: # TODO what was the exception type + model = mujoco.MjModel.from_xml_path(simulation_file + ".xml") + + # Initialise MjData + data = mujoco.MjData(model) + + # Add frame skip and dt to run parameters + run_parameters["frame_skip"] = int(1 / (model.opt.timestep * run_parameters["action_sample_freq"])) + run_parameters["dt"] = model.opt.timestep*run_parameters["frame_skip"] + + # Initialise a rendering context, required for e.g. some vision modules + run_parameters["rendering_context"] = Context(model, + max_resolution=run_parameters.get("max_resolution", [1280, 960])) + + # Initialise callbacks + callbacks = {} + for cb in run_parameters.get("callbacks", []): + callbacks[cb["name"]] = cls.get_class(cb["cls"])(cb["name"], **cb["kwargs"]) + + # Now initialise the actual classes; model and data are input to the inits so that stuff can be modified if needed + # (e.g. move target to a specific position wrt to a body part) + task = task_cls(model, data, **{**task_kwargs, **callbacks, **run_parameters}) + bm_model = bm_cls(model, data, **{**bm_kwargs, **callbacks, **run_parameters}) + perception = Perception(model, data, bm_model, perception_modules, {**callbacks, **run_parameters}) + + return model, data, task, bm_model, perception, callbacks + + @classmethod + def get(cls, simulator_folder, render_mode="rgb_array", render_mode_perception="embed", render_show_depths=False, run_parameters=None, use_cloned=True): + """ Returns a Simulator that is located in given folder. + + Args: + simulator_folder: Location of the simulator. + render_mode: Whether render() will return a single rgb array (render_mode="rgb_array"), + a list of rgb arrays (render_mode="rgb_array_list"; + adapted from https://github.com/openai/gym/blob/master/gym/wrappers/render_collection.py), + or None while the frames in a separate PyGame window are updated directly when calling + step() or reset() (render_mode="human"; + adapted from https://github.com/openai/gym/blob/master/gym/wrappers/human_rendering.py)). + render_mode_perception: Whether images of visual perception modules should be directly embedded into main camera view ("embed"), stored as separate videos ("separate"), or not used at all [which allows to watch vision in Unity Editor if debug mode is enabled/standalone app is disabled] (None) + render_show_depths: Whether depth images of visual perception modules should be included in rendering. + run_parameters: Can be used to override parameters during run time. + use_cloned: Can be useful for debugging. Set to False to use original files instead of the ones that have been + cloned/copied during building phase. + """ + + # Read config file + config_file = os.path.join(simulator_folder, "config.yaml") + try: + config = parse_yaml(config_file) + except: + raise FileNotFoundError(f"Could not open file {config_file}") + + # Make sure the simulator has been built + if "built" not in config: + raise RuntimeError("Simulator has not been built") + + # Make sure simulator_folder is in path (used to import gen_cls_cloned) + if simulator_folder not in sys.path: + sys.path.insert(0, simulator_folder) + + # Get Simulator class + gen_cls_cloned = getattr(importlib.import_module(config["package_name"]), "Simulator") + if hasattr(gen_cls_cloned, "version"): + _legacy_mode = False + gen_cls_cloned_version = gen_cls_cloned.version.split("-v")[-1] + else: + _legacy_mode = True + gen_cls_cloned_version = gen_cls_cloned.id.split("-v")[-1] #deprecated + if use_cloned: + gen_cls = gen_cls_cloned + else: + gen_cls = cls + gen_cls_version = gen_cls.version.split("-v")[-1] + + if gen_cls_version.split(".")[0] > gen_cls_cloned_version.split(".")[0]: + raise RuntimeError( + f"""Severe version mismatch. The simulator '{config["simulator_name"]}' has version {gen_cls_cloned_version}, while your uitb package has version {gen_cls_version}.\nTo run with version {gen_cls_cloned_version}, set 'use_cloned=True'.""") + elif gen_cls_version.split(".")[1] > gen_cls_cloned_version.split(".")[1]: + print( + f"""WARNING: Version mismatch. The simulator '{config["simulator_name"]}' has version {gen_cls_cloned_version}, while your uitb package has version {gen_cls_version}.\nTo run with version {gen_cls_version}, set 'use_cloned=True'.""") + + if _legacy_mode: + _simulator = gen_cls(simulator_folder, run_parameters=run_parameters) + else: + try: + _simulator = gen_cls(simulator_folder, render_mode=render_mode, render_mode_perception=render_mode_perception, render_show_depths=render_show_depths, + run_parameters=run_parameters) + except TypeError: + _simulator = gen_cls(simulator_folder, render_mode=render_mode, render_show_depths=render_show_depths, + run_parameters=run_parameters) + + # Return Simulator object + return _simulator + + def __init__(self, simulator_folder, render_mode="rgb_array", render_mode_perception="embed", render_show_depths=False, run_parameters=None): + """ Initialise a new `Simulator`. + + Args: + simulator_folder: Location of a simulator. + render_mode: Whether render() will return a single rgb array (render_mode="rgb_array"), + a list of rgb arrays (render_mode="rgb_array_list"; + adapted from https://github.com/openai/gym/blob/master/gym/wrappers/render_collection.py), + or None while the frames in a separate PyGame window are updated directly when calling + step() or reset() (render_mode="human"; + adapted from https://github.com/openai/gym/blob/master/gym/wrappers/human_rendering.py)). + render_mode_perception: Whether images of visual perception modules should be directly embedded into main camera view ("embed"), stored as separate videos ("separate"), or not used at all [which allows to watch vision in Unity Editor if debug mode is enabled/standalone app is disabled] (None) + render_show_depths: Whether depth images of visual perception modules should be included in rendering. + run_parameters: Can be used to override parameters during run time. + """ + + # Make sure simulator exists + if not os.path.exists(simulator_folder): + raise FileNotFoundError(f"Simulator folder {simulator_folder} does not exists") + self._simulator_folder = simulator_folder + + # Read config + self._config = parse_yaml(os.path.join(self._simulator_folder, "config.yaml")) + + # Get run parameters: these parameters can be used to override parameters used during training + self._run_parameters = self._config["simulation"]["run_parameters"].copy() + self._run_parameters.update(run_parameters or {}) + + # Initialise simulation + self._model, self._data, self.task, self.bm_model, self.perception, self.callbacks = \ + self._initialise(self._config, self._simulator_folder, self._run_parameters) + + # Set action space TODO for now we assume all actuators have control signals between [-1, 1] + self.action_space = self._initialise_action_space() + + # Set observation space + self.observation_space = self._initialise_observation_space() + + # Collect some episode statistics + self._episode_statistics = {"length (seconds)": 0, "length (steps)": 0, "reward": 0} + + # Initialise viewer + self._GUI_camera = Camera(self._run_parameters["rendering_context"], self._model, self._data, camera_id='for_testing', + dt=self._run_parameters["dt"]) + + self._render_mode = render_mode + self._render_mode_perception = render_mode_perception #whether perception camera views should be directly embedded into camera view of camera_id ("embed"), stored in self._render_stack_perception ("separate"), or not used at all "separate"), or not used at all [which allows to watch vision in Unity Editor if debug mode is enabled/standalone app is disabled] (None) + self._render_stack = [] #only used if render_mode == "rgb_array_list" + self._render_stack_perception = defaultdict(list) #only used if render_mode == "rgb_array_list" and self._render_mode_perception == "separate" + self._render_stack_pop = True #If True, clear the render stack after .render() is called. + self._render_stack_clean_at_reset = True #If True, clear the render stack when .reset() is called. + self._render_show_depths = render_show_depths #If True, depth images of visual perception modules are included in GUI rendering. + self._render_screen_size = None #only used if render_mode == "human" + self._render_window = None #only used if render_mode == "human" + self._render_clock = None #only used if render_mode == "human" + + if 'llc' in self.config: #if HRL approach is used + llc_simulator_folder = os.path.join(output_path(), self.config["llc"]["simulator_name"]) + if llc_simulator_folder not in sys.path: + sys.path.insert(0, llc_simulator_folder) + if not os.path.exists(llc_simulator_folder): + raise FileNotFoundError(f"Simulator folder {llc_simulator_folder} does not exists") + llccheckpoint_dir = os.path.join(llc_simulator_folder, 'checkpoints') + # Load policy TODO should create a load method for uitb.rl.BaseRLModel + print(f'Loading model: {os.path.join(llccheckpoint_dir, self.config["llc"]["checkpoint"])}\n') + self.llc_model = PPO.load(os.path.join(llccheckpoint_dir, self.config["llc"]["checkpoint"])) + self.action_space = self._initialise_HRL_action_space() + self._max_steps = self.config["llc"]["llc_ratio"] + self._dwell_threshold = int(0.5*self._max_steps) + self._target_radius = 0.05 + self._independent_dofs = [] + self._independent_joints = [] + joints = self.config["llc"]["joints"] + for joint in joints: + joint_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_JOINT, joint) + if self._model.jnt_type[joint_id] not in [mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE]: + raise NotImplementedError(f"Only 'hinge' and 'slide' joints are supported, joint " + f"{joint} is of type {mujoco.mjtJoint(self._model.jnt_type[joint_id]).name}") + self._independent_dofs.append(self._model.jnt_qposadr[joint_id]) + self._independent_joints.append(joint_id) + self._jnt_range = self._model.jnt_range[self._independent_joints] + + + #To normalize joint ranges for llc + def _normalise_qpos(self, qpos): + # Normalise to [0, 1] + qpos = (qpos - self._jnt_range[:, 0]) / (self._jnt_range[:, 1] - self._jnt_range[:, 0]) + # Normalise to [-1, 1] + qpos = (qpos - 0.5) * 2 + return qpos + + def _initialise_action_space(self): + """ Initialise action space. """ + num_actuators = self.bm_model.nu + self.perception.nu + actuator_limits = np.ones((num_actuators,2)) * np.array([-1.0, 1.0]) + return spaces.Box(low=np.float32(actuator_limits[:, 0]), high=np.float32(actuator_limits[:, 1])) + + def _initialise_HRL_action_space(self): + bm_nu = self.bm_model.nu + bm_jnt_range = np.ones((bm_nu,2)) * np.array([-1.0, 1.0]) + perception_nu = self.perception.nu + perception_jnt_range = np.ones((perception_nu,2)) * np.array([-1.0, 1.0]) + jnt_range = np.concatenate((bm_jnt_range, perception_jnt_range), axis=0) + action_space = gym.spaces.Box(low=jnt_range[:,0], high=jnt_range[:,1]) + return action_space + + def _initialise_observation_space(self): + """ Initialise observation space. """ + observation = self.get_observation() + obs_dict = dict() + for module in self.perception.perception_modules: + obs_dict[module.modality] = spaces.Box(dtype=np.float32, **module.get_observation_space_params()) + if "stateful_information" in observation: + obs_dict["stateful_information"] = spaces.Box(dtype=np.float32, + **self.task.get_stateful_information_space_params()) + return spaces.Dict(obs_dict) + + def _get_qpos(self, model, data): + qpos = data.qpos[self._independent_dofs].copy() + qpos = self._normalise_qpos(qpos) + return qpos + + def step(self, action): + """ Step simulation forward with given actions. + + Args: + action: Actions sampled from a policy. Limited to range [-1, 1]. + """ + if 'llc' in self.config: #if HRL approach is used + self.task._target_qpos = action # action to pass to LLC + self._steps = 0 # Initialise loop control to 0 + #acc_reward = 0 #To be used when rewards are being accumulated in llc steps + + while self._steps < self._max_steps: # loop for llc controls based on llc_ratio + + llc_action, _states = self.llc_model.predict(self.get_llcobservation(action), deterministic=True) # Get BM action from LLC + # Set control for the bm model + self.bm_model.set_ctrl(self._model, self._data, llc_action) + + # Set control for perception modules (e.g. eye movements) + self.perception.set_ctrl(self._model, self._data, action[self.bm_model.nu:]) + + # Advance the simulation + mujoco.mj_step(self._model, self._data, nstep=int(self._run_parameters["frame_skip"])) # Number of timesteps to skip for LLC + + # Update bm model (e.g. update constraints); updates also effort model + self.bm_model.update(self._model, self._data) + + # Update perception modules + self.perception.update(self._model, self._data) + + dist = np.abs(action - self._get_qpos(self._model, self._data)) + + # Update environment + reward, terminated, truncated, info = self.task.update(self._model, self._data) + + # Add an effort cost to reward + reward -= self.bm_model.get_effort_cost(self._model, self._data) + + #acc_reward += reward #To be used when rewards are being accumulated in llc steps + + # Get observation + obs = self.get_observation() + + # Add frame to stack + if self._render_mode == "rgb_array_list": + self._render_stack.append(self._GUI_rendering()) + elif self._render_mode == "human": + self._GUI_rendering_pygame() + + if truncated or terminated: + break + + # Pointing + if "target_spawned" in info: + if info["target_spawned"] or info["target_hit"]: + break + + # Choice Reaction + elif "new_button_generated" in info: + if info["new_button_generated"] or info["target_hit"]: + break + + self._steps += 1 + if np.all(dist < self._target_radius): + break + + return obs, reward, terminated, truncated, info + + else: + # Set control for the bm model + self.bm_model.set_ctrl(self._model, self._data, action[:self.bm_model.nu]) + + # Set control for perception modules (e.g. eye movements) + self.perception.set_ctrl(self._model, self._data, action[self.bm_model.nu:]) + + # Advance the simulation + mujoco.mj_step(self._model, self._data, nstep=self._run_parameters["frame_skip"]) + + # Update bm model (e.g. update constraints); updates also effort model + self.bm_model.update(self._model, self._data) + + # Update perception modules + self.perception.update(self._model, self._data) + + # Update environment + reward, terminated, truncated, info = self.task.update(self._model, self._data) + + # Add an effort cost to reward + effort_cost = self.bm_model.get_effort_cost(self._model, self._data) + info["EffortCost"] = effort_cost + reward -= effort_cost + + # Get observation + obs = self.get_observation(info) + + # Add frame to stack + if self._render_mode == "rgb_array_list": + self._render_stack.append(self._GUI_rendering()) + elif self._render_mode == "human": + self._GUI_rendering_pygame() + + return obs, reward, terminated, truncated, info + + + def get_observation(self, info=None): + """ Returns an observation from the perception model. + + Returns: + A dict with observations from individual perception modules. May also contain stateful information from a task. + """ + + # Get observation from perception + observation = self.perception.get_observation(self._model, self._data, info) + + # Add any stateful information that is required + stateful_information = self.task.get_stateful_information(self._model, self._data) + if stateful_information.size > 0: #TODO: define stateful_information (and encoder) that can be used as default, if no stateful information is provided (zero-size arrays do not work with sb3 currently...) + observation["stateful_information"] = stateful_information + + return observation + + def get_llcobservation(self,action): + """ Returns an observation from the perception model. + + Returns: + A dict with observations from individual perception modules. May also contain stateful information from a task. + """ + + # Get observation from perception + observation = self.perception.get_observation(self._model, self._data) + + # Remove Vision for LLC + observation.pop("vision") + qpos = self._get_qpos(self._model, self._data) + qpos_diff = action - qpos + + # Stateful Information for LLC policy + stateful_information = qpos_diff + if stateful_information is not None: + observation["stateful_information"] = stateful_information + + return observation + + + def reset(self, seed=None): + """ Reset the simulator and return an observation. """ + + super().reset(seed=seed) + + # Reset sim + mujoco.mj_resetData(self._model, self._data) + + # Reset all models + self.bm_model.reset(self._model, self._data) + self.perception.reset(self._model, self._data) + info = self.task.reset(self._model, self._data) + + # Do a forward so everything will be set + mujoco.mj_forward(self._model, self._data) + + if self._render_mode == "rgb_array_list": + if self._render_stack_clean_at_reset: + self._render_stack = [] + self._render_stack_perception = defaultdict(list) + self._render_stack.append(self._GUI_rendering()) + elif self._render_mode == "human": + self._GUI_rendering_pygame() + + return self.get_observation(), info + + def render(self): + if self._render_mode == "rgb_array_list": + render_stack = self._render_stack + if self._render_stack_pop: + self._render_stack = [] + return render_stack + elif self._render_mode == "rgb_array": + return self._GUI_rendering() + else: + return None + + def get_render_stack_perception(self): + render_stack_perception = self._render_stack_perception + # if self._render_stack_pop: + # self._render_stack_perception = defaultdict(list) + return render_stack_perception + + def _GUI_rendering(self): + # Grab an image from the 'for_testing' camera and grab all GUI-prepared images from included visual perception modules, and display them 'picture-in-picture' + + # Grab images + img, _ = self._GUI_camera.render() + + if self._render_mode_perception == "embed": + # Embed perception camera images into main camera image + + # perception_camera_images = [rgb_or_depth_array for camera in self.perception.cameras + # for rgb_or_depth_array in camera.render() if rgb_or_depth_array is not None] + perception_camera_images = self.perception.get_renders() + + # TODO: add text annotations to perception camera images + if len(perception_camera_images) > 0: + _img_size = img.shape[:2] #(height, width) + + + # Vertical alignment of perception camera images, from bottom right to top right + ## TODO: allow for different inset locations + _desired_subwindow_height = np.round(_img_size[0] / len(perception_camera_images)).astype(int) + _maximum_subwindow_width = np.round(0.2 * _img_size[1]).astype(int) + + perception_camera_images_resampled = [] + for ocular_img in perception_camera_images: + # Convert 2D depth arrays to 3D heatmap arrays + if ocular_img.ndim == 2: + if self._render_show_depths: + ocular_img = matplotlib.pyplot.imshow(ocular_img, cmap=matplotlib.pyplot.cm.jet, interpolation='bicubic').make_image('TkAgg', unsampled=True)[0][ + ..., :3] + matplotlib.pyplot.close() #delete image + else: + continue + + resample_factor = min(_desired_subwindow_height / ocular_img.shape[0], _maximum_subwindow_width / ocular_img.shape[1]) + + resample_height = np.round(ocular_img.shape[0] * resample_factor).astype(int) + resample_width = np.round(ocular_img.shape[1] * resample_factor).astype(int) + resampled_img = np.zeros((resample_height, resample_width, ocular_img.shape[2]), dtype=np.uint8) + for channel in range(ocular_img.shape[2]): + resampled_img[:, :, channel] = scipy.ndimage.zoom(ocular_img[:, :, channel], resample_factor, order=0) + + perception_camera_images_resampled.append(resampled_img) + + ocular_img_bottom = _img_size[0] + for ocular_img_idx, ocular_img in enumerate(perception_camera_images_resampled): + #print(f"Modify ({ocular_img_bottom - ocular_img.shape[0]}, { _img_size[1] - ocular_img.shape[1]})-({ocular_img_bottom}, {img.shape[1]}).") + img[ocular_img_bottom - ocular_img.shape[0]:ocular_img_bottom, _img_size[1] - ocular_img.shape[1]:] = ocular_img + ocular_img_bottom -= ocular_img.shape[0] + # input((len(perception_camera_images_resampled), perception_camera_images_resampled[0].shape, img.shape)) + elif self._render_mode_perception == "separate": + for module, camera_list in self.perception.cameras_dict.items(): + for camera in camera_list: + for rgb_or_depth_array in camera.render(): + if rgb_or_depth_array is not None: + self._render_stack_perception[f"{module.modality}/{type(camera).__name__}"].append(rgb_or_depth_array) + + return img + + def _GUI_rendering_pygame(self): + rgb_array = np.transpose(self._GUI_rendering(), axes=(1, 0, 2)) + + if self._render_screen_size is None: + self._render_screen_size = rgb_array.shape[:2] + + assert self._render_screen_size == rgb_array.shape[ + :2], f"Expected an rgb array of shape {self._render_screen_size} from self._GUI_camera, but received an rgb array of shape {rgb_array.shape[:2]}. " + + if self._render_window is None: + pygame.init() + pygame.display.init() + self._render_window = pygame.display.set_mode(self._render_screen_size) + + if self._render_clock is None: + self._render_clock = pygame.time.Clock() + + surf = pygame.surfarray.make_surface(rgb_array) + self._render_window.blit(surf, (0, 0)) + pygame.event.pump() + self._render_clock.tick(self.fps) + pygame.display.flip() + + def close(self): + """ Close the rendering window (if self._render_mode == 'human').""" + super().close() + if self._render_window is not None: + import pygame + + pygame.display.quit() + pygame.quit() + + @property + def fps(self): + return self._GUI_camera._fps + + def callback(self, callback_name, num_timesteps): + """ Update a callback -- may be useful during training, e.g. for curriculum learning. """ + self.callbacks[callback_name].update(num_timesteps) + + def update_callbacks(self, num_timesteps): + """ Update all callbacks. """ + for callback_name in self.callbacks: + self.callback(callback_name, num_timesteps) + + # def get_logdict_keys(self): + # return list(self.task._info["log_dict"].keys()) + + # def get_logdict_value(self, key): + # return self.task._info["log_dict"].get(key) + + @property + def config(self): + """ Return config. """ + return copy.deepcopy(self._config) + + @property + def run_parameters(self): + """ Return run parameters. """ + # Context cannot be deep copied + exclude = {"rendering_context"} + run_params = {k: copy.deepcopy(self._run_parameters[k]) for k in self._run_parameters.keys() - exclude} + run_params["rendering_context"] = self._run_parameters["rendering_context"] + return run_params + + @property + def simulator_folder(self): + """ Return simulator folder. """ + return self._simulator_folder + + @property + def render_mode(self): + """ Return render mode. """ + return self._render_mode + + def get_state(self): + """ Return a state of the simulator / individual components (biomechanical model, perception model, task). + + This function is used for logging/evaluation purposes, not for RL training. + + Returns: + A dict with one float or numpy vector per keyword. + """ + + # Get time, qpos, qvel, qacc, act_force, act, ctrl of the current simulation + state = {"timestep": self._data.time, + "qpos": self._data.qpos.copy(), + "qvel": self._data.qvel.copy(), + "qacc": self._data.qacc.copy(), + "act_force": self._data.actuator_force.copy(), + "act": self._data.act.copy(), + "ctrl": self._data.ctrl.copy()} + + # Add state from the task + state.update(self.task.get_state(self._model, self._data)) + + # Add state from the biomechanical model + state.update(self.bm_model.get_state(self._model, self._data)) + + # Add state from the perception model + state.update(self.perception.get_state(self._model, self._data)) + + return state + + def close(self, **kwargs): + """ Perform any necessary clean up. + + This function is inherited from gym.Env. It should be automatically called when this object is garbage collected + or the program exists, but that doesn't seem to be the case. This function will be called if this object has been + initialised in the context manager fashion (i.e. using the 'with' statement). """ + self.task.close(**kwargs) + self.perception.close(**kwargs) + self.bm_model.close(**kwargs)