DexForce · yhnsu · Feb 6, 2026 · Feb 6, 2026 · Feb 6, 2026 · Feb 27, 2026
diff --git a/configs/agents/rl/push_cube/train_config.json b/configs/agents/rl/push_cube/train_config.json
@@ -9,14 +9,15 @@
         "gpu_id": 0,
         "num_envs": 64,
         "iterations": 1000,
-        "rollout_steps": 1024,
+        "buffer_size": 1024,
         "enable_eval": true,
         "num_eval_envs": 16,
         "num_eval_episodes": 3,
-        "eval_freq": 2,
+        "eval_freq": 200,
         "save_freq": 200,
         "use_wandb": false,
         "wandb_project_name": "embodychain-push_cube",
+        "model_type": "standard",
         "events": {
             "eval": {
                 "record_camera": {
@@ -38,6 +39,7 @@
     },
     "policy": {
         "name": "actor_critic",
+        "action_dim": 8,
         "actor": {
             "type": "mlp",
             "network_cfg": {

diff --git a/configs/agents/rl/stack_bowls/train_config_grpo.json b/configs/agents/rl/stack_bowls/train_config_grpo.json
@@ -0,0 +1,61 @@
+{
+    "trainer": {
+        "exp_name": "stack_bowls_vla_grpo",
+        "gym_config": "/root/workspace/research/embodichain/configs/gym/stack_bowls/gym_config.json",
+        "seed": 42,
+        "device": "cuda:0",
+        "headless": true,
+        "enable_rt": false,
+        "gpu_id": 0,
+        "num_envs": 8,
+        "iterations": 1000,
+        "buffer_size": 64,
+        "enable_eval": false,
+        "eval_freq": 0,
+        "save_freq": 100,
+        "use_wandb": false,
+        "wandb_project_name": "embodychain-stack_bowls",
+        "import_modules": [
+            "dexechain.lab.gym.envs.tasks.tableware.stack_bowls_v1.stack_bowls"
+        ],
+        "model_type": "standard"
+    },
+    "policy": {
+        "name": "vla",
+        "action_dim": 14,
+        "vla_config": {
+            "model_path": "/root/workspace/output/stack_bowls/checkpoint-19000",
+            "instruction": "Stack the bowls.",
+            "inference_horizon": 32,
+            "action_std_init": 0.01,
+            "robot_type": "CobotMagic",
+            "gripper_open_value": 0.05,
+            "gripper_closed_value": 0.0,
+            "action_key_order": [
+                "left_armdelta_qpos",
+                "left_eefgripper",
+                "right_armdelta_qpos",
+                "right_eefgripper"
+            ],
+            "model_config": {
+                "torch_dtype": "float32"
+            }
+        }
+    },
+    "algorithm": {
+        "name": "grpo",
+        "cfg": {
+            "learning_rate": 1e-5,
+            "n_epochs": 4,
+            "batch_size": 256,
+            "gamma": 0.99,
+            "clip_coef": 0.2,
+            "ent_coef": 0.001,
+            "kl_coef": 0.02,
+            "group_size": 4,
+            "eps": 1e-8,
+            "max_grad_norm": 1.0,
+            "truncate_at_first_done": true
+        }
+    }
+}
diff --git a/configs/agents/rl/vla_example/train_config.json b/configs/agents/rl/vla_example/train_config.json
@@ -0,0 +1,70 @@
+{
+    "trainer": {
+        "exp_name": "vla_fine_tuning_ppo",
+        "gym_config": "configs/agents/rl/push_cube/gym_config.json",
+        "seed": 42,
+        "device": "cuda:0",
+        "headless": true,
+        "enable_rt": false,
+        "gpu_id": 0,
+        "num_envs": 32,
+        "iterations": 500,
+        "buffer_size": 2048,
+        "enable_eval": true,
+        "num_eval_envs": 8,
+        "num_eval_episodes": 3,
+        "eval_freq": 100,
+        "save_freq": 100,
+        "use_wandb": true,
+        "wandb_project_name": "embodychain-vla-training",
+        "model_type": "vla",
+        "events": {
+            "eval": {
+                "record_camera": {
+                    "func": "record_camera_data_async",
+                    "mode": "interval",
+                    "interval_step": 1,
+                    "params": {
+                        "name": "main_cam",
+                        "resolution": [640, 480],
+                        "eye": [-1.4, 1.4, 2.0],
+                        "target": [0, 0, 0],
+                        "up": [0, 0, 1],
+                        "intrinsics": [600, 600, 320, 240],
+                        "save_path": "./outputs/videos/vla_eval"
+                    }
+                }
+            }
+        }
+    },
+    "policy": {
+        "name": "vla",
+        "action_dim": 7,
+        "vla_config": {
+            "model_path": "checkpoints/pretrained_vla_model.pth",
+            "model_class": "vla_models.GPTVLAModel",
+            "model_config": {
+                "vision_encoder": "resnet50",
+                "language_model": "gpt2-medium",
+                "action_head_hidden_size": 512,
+                "freeze_vision_encoder": false,
+                "freeze_language_model": false
+            }
+        }
+    },
+    "algorithm": {
+        "name": "ppo",
+        "cfg": {
+            "learning_rate": 1e-5,
+            "n_epochs": 4,
+            "batch_size": 2048,
+            "gamma": 0.99,
+            "gae_lambda": 0.95,
+            "rollout_time_first": false,
+            "clip_coef": 0.2,
+            "ent_coef": 0.001,
+            "vf_coef": 0.5,
+            "max_grad_norm": 1.0
+        }
+    }
+}