From 07394632c70edaf5b9cfb2143e55fc35552eff29 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 23 May 2025 23:32:41 -0400
Subject: [PATCH 01/82] rebasing improved boids to new 4.0

---
 config/boids.ini               |  15 ++--
 ocean/boids/binding.c          |   6 +-
 ocean/boids/boids.h            |  22 +++---
 pufferlib/ocean/boids/boids.py | 126 +++++++++++++++++++++++++++++++++
 4 files changed, 150 insertions(+), 19 deletions(-)
 create mode 100644 pufferlib/ocean/boids/boids.py

diff --git a/config/boids.ini b/config/boids.ini
index d685a42c6b..0e6c8d33e6 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -1,15 +1,20 @@
 [base]
-env_name = boids
+package = ocean
+env_name = puffer_boids 
+; policy_name = Boids
+policy_name = Policy
+rnn_name = Recurrent
+; rnn_name = None
 
 [env]
 num_envs = 64
 num_boids = 64
 ; num_envs = 1
 ; num_boids = 1
-margin_turn_factor = 0.0
-centering_factor = 0.00
-avoid_factor = 1.00
-matching_factor = 1.00
+margin_turn_factor = 1.0
+cohesion_factor = 0.0
+seperation_factor = 0.0
+alignment_factor = 0.0
 
 [vec]
 num_workers = 2
diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index a3483d6520..27b40defb4 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -7,9 +7,9 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
     env->num_boids = unpack(kwargs, "num_boids");
     env->report_interval = unpack(kwargs, "report_interval");
     env->margin_turn_factor = unpack(kwargs, "margin_turn_factor");
-    env->centering_factor = unpack(kwargs, "centering_factor");
-    env->avoid_factor = unpack(kwargs, "avoid_factor");
-    env->matching_factor = unpack(kwargs, "matching_factor");
+    env->cohesion_factor = unpack(kwargs, "cohesion_factor");
+    env->seperation_factor = unpack(kwargs, "seperation_factor");
+    env->alignment_factor = unpack(kwargs, "alignment_factor");
     init(env);
     return 0;
 }
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index bf2bf6331e..5828b57169 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -52,9 +52,9 @@ typedef struct {
     Boid* boids;
     unsigned int num_boids;
     float margin_turn_factor;
-    float centering_factor;
-    float avoid_factor;
-    float matching_factor;
+    float cohesion_factor;
+    float seperation_factor;
+    float alignment_factor;
     unsigned tick;
     Log log;
     Log* boid_logs;
@@ -134,8 +134,8 @@ void c_step(Boids *env) {
             current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP);
             current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
         } else {
-            current_boid->velocity.x = flclip(current_boid->velocity.x + 2*env->actions[current_indx * 2 + 0], -VELOCITY_CAP, VELOCITY_CAP);
-            current_boid->velocity.y = flclip(current_boid->velocity.y + 2*env->actions[current_indx * 2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.x = flclip(current_boid->velocity.x + env->actions[current_indx * 2 + 0], -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.y = flclip(current_boid->velocity.y + env->actions[current_indx * 2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
         }
         current_boid->x = flclip(current_boid->x + current_boid->velocity.x, 0, WIDTH  - BOID_WIDTH);
         current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
@@ -161,8 +161,8 @@ void c_step(Boids *env) {
             }
         }
         if (protected_count > 0) {
-            //current_boid_reward -= fabsf(protected_dist_sum / protected_count) * env->avoid_factor;
-            current_boid_reward -= flclip(protected_count/5.0, 0.0f, 1.0f) * env->avoid_factor;
+            //current_boid_reward -= fabsf(protected_dist_sum / protected_count) * env->seperation_factor;
+            current_boid_reward -= flclip(protected_count/5.0, 0.0f, 1.0f) * env->seperation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;
@@ -170,10 +170,10 @@ void c_step(Boids *env) {
             vis_vx_avg = vis_vx_sum / visual_count;
             vis_vy_avg = vis_vy_sum / visual_count;
 
-            current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->matching_factor;
-            current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->matching_factor;
-            current_boid_reward -= fabsf(vis_x_avg  - current_boid->x) * env->centering_factor;
-            current_boid_reward -= fabsf(vis_y_avg  - current_boid->y) * env->centering_factor;
+            current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
+            current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
+            current_boid_reward -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
+            current_boid_reward -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
         }
         if (current_boid->y < TOP_MARGIN || current_boid->y > HEIGHT - BOTTOM_MARGIN) {
             current_boid_reward -= env->margin_turn_factor;
diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py
new file mode 100644
index 0000000000..b386449b99
--- /dev/null
+++ b/pufferlib/ocean/boids/boids.py
@@ -0,0 +1,126 @@
+'''
+High-perf Boids
+Inspired by https://people.ece.cornell.edu/land/courses/ece4760/labs/s2021/Boids/Boids.html
+'''
+
+import numpy as np
+import gymnasium
+
+import pufferlib
+from pufferlib.ocean.boids import binding
+
+class Boids(pufferlib.PufferEnv):
+    def __init__(
+        self,
+        num_envs=1,
+        buf=None,
+        render_mode=None,
+        seed=0,
+        report_interval=1,
+        num_boids=1,
+        margin_turn_factor=1.0,
+        cohesion_factor=0.0,
+        seperation_factor=0.0,
+        alignment_factor=0.0
+    ):
+        ACTION_SPACE_SIZE = 2
+        self.num_agents = num_envs * num_boids
+        self.num_boids = num_boids
+
+        self.single_observation_space = gymnasium.spaces.Box(
+            -1000.0, 1000.0, shape=(num_boids*4,), dtype=np.float32
+        )
+        
+        #self.single_action_space = gymnasium.spaces.Box(
+        #    -np.inf, np.inf, shape=(ACTION_SPACE_SIZE,), dtype=np.float32
+        #)
+        self.single_action_space = gymnasium.spaces.MultiDiscrete([5, 5])
+
+        self.render_mode = render_mode
+        self.report_interval = report_interval
+
+        super().__init__(buf)
+        self.actions = self.actions.astype(np.float32)
+
+        # Create C binding with flattened action buffer
+        # We need to manually create a flattened action buffer to pass to C
+        #self.flat_actions = np.zeros((self.num_agents * ACTION_SPACE_SIZE), dtype=np.float32)
+        
+        c_envs = []
+        for env_num in range(num_envs):
+            c_envs.append(binding.env_init(
+                self.observations[env_num*num_boids:(env_num+1)*num_boids],
+                #self.flat_actions[env_num*num_boids*ACTION_SPACE_SIZE:(env_num+1)*num_boids*ACTION_SPACE_SIZE],
+                self.actions[env_num*num_boids:(env_num+1)*num_boids],
+                self.rewards[env_num*num_boids:(env_num+1)*num_boids],
+                self.terminals[env_num*num_boids:(env_num+1)*num_boids],
+                self.truncations[env_num*num_boids:(env_num+1)*num_boids],
+                seed,
+                num_boids=num_boids,
+                report_interval=self.report_interval,
+                margin_turn_factor=margin_turn_factor,
+                cohesion_factor=cohesion_factor,
+                seperation_factor=seperation_factor,
+                alignment_factor=alignment_factor,
+            ))
+        
+        self.c_envs = binding.vectorize(*c_envs)
+
+    def reset(self, seed=0):
+        self.tick = 0
+        binding.vec_reset(self.c_envs, seed)
+        return self.observations, []
+
+    def step(self, actions):
+        # Clip actions to valid range
+        clipped_actions = (actions.astype(np.float32) - 2.0) / 4.0
+        #clipped_actions = np.clip(actions, -1.0, 1.0)
+        
+        # Copy the clipped actions to our flat actions buffer for C binding
+        # Flatten from [num_agents, num_boids, 2] to a 1D array for C
+        # TODO: Check if I even need this? its not like I'm using the actions anywhere else
+        #self.flat_actions[:] = clipped_actions.reshape(-1)
+        
+        # Save the original actions for the experience buffer
+        # TODO: Same thing with this
+        self.actions[:] = clipped_actions
+        
+        self.tick += 1
+        binding.vec_step(self.c_envs)
+
+        info = []
+        if self.tick % self.report_interval == 0:
+            log_data = binding.vec_log(self.c_envs)
+            if log_data:
+                info.append(log_data)
+
+        # print(f"OBSERVATIONS: {self.observations}")
+        return (self.observations, self.rewards,
+            self.terminals, self.truncations, info)
+
+    def render(self):
+        binding.vec_render(self.c_envs, 0)
+
+    def close(self):
+        binding.vec_close(self.c_envs)
+
+def test_performance(timeout=10, atn_cache=1024):
+    env = Boids(num_envs=1000)
+    env.reset()
+    tick = 0
+
+    # Generate random actions with proper shape: [cache_size, num_agents, action_dim]
+    actions = np.random.uniform(-3.0, 3.0, (atn_cache, env.num_agents, 2))
+
+    import time
+    start = time.time()
+    while time.time() - start < timeout:
+        atn = actions[tick % atn_cache]
+        env.step(atn)
+        tick += 1
+
+    print(f'SPS: {env.num_agents * tick / (time.time() - start)}')
+
+
+if __name__ == '__main__':
+    test_performance()

From fcc859afe6aa60e10cff65be3db9ce688f059908 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 24 May 2025 13:35:15 -0400
Subject: [PATCH 02/82] remove unused log fields

---
 ocean/boids/binding.c |  3 ---
 ocean/boids/boids.h   | 14 +++-----------
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index 27b40defb4..89cb389759 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -15,10 +15,7 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
 }
 
 static int my_log(PyObject* dict, Log* log) {
-    assign_to_dict(dict, "perf", log->perf);
     assign_to_dict(dict, "score", log->score);
-    assign_to_dict(dict, "episode_return", log->episode_return);
-    assign_to_dict(dict, "episode_length", log->episode_length);
     assign_to_dict(dict, "n", log->n);
     return 0;
 }
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 5828b57169..1feb6be255 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -22,10 +22,7 @@
 #define BOID_TEXTURE_PATH "./resources/puffers_128.png"
 
 typedef struct {
-    float perf;
     float score;
-    float episode_return;
-    float episode_length;
     float n;
 } Log;
 
@@ -60,7 +57,6 @@ typedef struct {
     Log* boid_logs;
     unsigned report_interval;
     Client* client;
-
 } Boids;
 
 static inline float flmax(float a, float b) { return a > b ? a : b; }
@@ -191,13 +187,9 @@ void c_step(Boids *env) {
         env->rewards[current_indx] = current_boid_reward / 2.0f;
 
         //log updates
-        if (env->tick == env->report_interval) {
-            env->log.score          += env->rewards[current_indx];
-            env->log.n              += 1.0f;
-
-            /* clear per-boid log for next episode */
-            // env->boid_logs[boid_indx] = (Log){0};
-            env->tick = 0;
+        if (env->tick % env->report_interval == 0) {
+            env->log.score += env->rewards[current_indx];
+            env->log.n += 1.0f;
         }
     }
     //env->log.score /= env->num_boids;

From 568e620bcd59998c16b60548ce51f6e0d608ac78 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 24 May 2025 13:35:29 -0400
Subject: [PATCH 03/82] remove unused variable

---
 ocean/boids/boids.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 1feb6be255..cc8bfeced7 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -88,8 +88,6 @@ void init(Boids *env) {
 
 
 static void compute_observations(Boids *env) {
-    unsigned base_indx;
-
     int idx = 0;
     for (unsigned i=0; i<env->num_boids; i++) {
         for (unsigned j=0; j<env->num_boids; j++) {

From d1e457e1e8422ea1e6169187f4057d5e270999a6 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 24 May 2025 16:08:29 -0400
Subject: [PATCH 04/82] remove unused commented code

---
 pufferlib/ocean/boids/boids.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py
index b386449b99..96784b7bd9 100644
--- a/pufferlib/ocean/boids/boids.py
+++ b/pufferlib/ocean/boids/boids.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 import gymnasium
-
 import pufferlib
 from pufferlib.ocean.boids import binding
 
@@ -74,12 +73,6 @@ def reset(self, seed=0):
     def step(self, actions):
         # Clip actions to valid range
         clipped_actions = (actions.astype(np.float32) - 2.0) / 4.0
-        #clipped_actions = np.clip(actions, -1.0, 1.0)
-        
-        # Copy the clipped actions to our flat actions buffer for C binding
-        # Flatten from [num_agents, num_boids, 2] to a 1D array for C
-        # TODO: Check if I even need this? its not like I'm using the actions anywhere else
-        #self.flat_actions[:] = clipped_actions.reshape(-1)
         
         # Save the original actions for the experience buffer
         # TODO: Same thing with this
@@ -94,7 +87,6 @@ def step(self, actions):
             if log_data:
                 info.append(log_data)
 
-        # print(f"OBSERVATIONS: {self.observations}")
         return (self.observations, self.rewards,
             self.terminals, self.truncations, info)
 

From 31ba846e76b37f28ae31e737f935541594c74c71 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 28 May 2025 23:33:31 -0400
Subject: [PATCH 05/82] remove unused boid_logs and fix logs calculation

---
 ocean/boids/boids.h | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index cc8bfeced7..f815036114 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -54,7 +54,6 @@ typedef struct {
     float alignment_factor;
     unsigned tick;
     Log log;
-    Log* boid_logs;
     unsigned report_interval;
     Client* client;
 } Boids;
@@ -69,12 +68,10 @@ static void respawn_boid(Boids *env, unsigned int i) {
     env->boids[i].y = rndf(BOTTOM_MARGIN, HEIGHT - TOP_MARGIN);
     env->boids[i].velocity.x = 0;
     env->boids[i].velocity.y = 0;
-    env->boid_logs[i]       = (Log){0};
 }
 
 void init(Boids *env) {
     env->boids = (Boid*)calloc(env->num_boids, sizeof(Boid));
-    env->boid_logs = (Log*)calloc(env->num_boids, sizeof(Log));
     env->log = (Log){0};
     env->tick = 0;
 
@@ -117,10 +114,12 @@ void c_step(Boids *env) {
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
     float mouse_x = (float)GetMouseX();
     float mouse_y = (float)GetMouseY();
+    float avg_reward = 0.0f;
 
     env->tick++;
     env->rewards[0] = 0;
     env->log.score = 0;
+    env->log.n = 0;
     for (unsigned current_indx = 0; current_indx < env->num_boids; current_indx++) {
         // apply action
         current_boid = &env->boids[current_indx];
@@ -183,14 +182,14 @@ void c_step(Boids *env) {
         // env->rewards[current_indx] = current_boid_reward / 15.0f;
         // printf("current_boid_reward: %f\n", current_boid_reward);
         env->rewards[current_indx] = current_boid_reward / 2.0f;
-
-        //log updates
-        if (env->tick % env->report_interval == 0) {
-            env->log.score += env->rewards[current_indx];
-            env->log.n += 1.0f;
-        }
+        avg_reward += env->rewards[current_indx];
+    }
+    //log updates
+    avg_reward /= env->num_boids;
+    if (env->tick % env->report_interval == 0) {
+        env->log.score = avg_reward;
+        env->log.n = 1;
     }
-    //env->log.score /= env->num_boids;
 
     compute_observations(env);
 }
@@ -210,7 +209,6 @@ void c_close_client(Client* client) {
 
 void c_close(Boids* env) {
     free(env->boids);
-    free(env->boid_logs);
     if (env->client != NULL) {
         c_close_client(env->client);
     }

From 374e1671d9bdd0a75740398acfadaa1b6ded4f89 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 31 May 2025 16:08:37 -0400
Subject: [PATCH 06/82] fix overflow and zero report_interval

---
 ocean/boids/boids.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index 735f8e8c9e..14de0d5e5a 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -7,6 +7,7 @@
 
 // --- Demo Configuration ---
 #define NUM_BOIDS_DEMO 20   // Number of boids for the standalone demo
+#define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
 #define MAX_STEPS_DEMO 500 // Max steps per episode in the demo
 #define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
 
@@ -27,11 +28,12 @@ void demo() {
     // Initialize Boids environment struct
     Boids env = {0}; 
     env.num_boids = NUM_BOIDS_DEMO;
+    env.report_interval = REPORT_INTERVAL_DEMO;
     
     // In the Python binding, these pointers are assigned from NumPy arrays.
     // Here, we need to allocate them explicitly.
-    size_t obs_size = env.num_boids * 4; // num_boids * (x, y, vx, vy)
-    size_t act_size = env.num_boids * 2; // num_boids * (dvx, dvy)
+    size_t obs_size = env.num_boids * env.num_boids * 4; // the 4 = (x, y, vx, vy)
+    size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy)
     env.observations = (float*)calloc(obs_size, sizeof(float));
     env.actions = (float*)calloc(act_size, sizeof(float));
     env.rewards = (float*)calloc(env.num_boids, sizeof(float)); // Env-level reward

From c5b5dd6b28b060ccf81b339ff10ccfd52a4108c0 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 31 May 2025 16:32:35 -0400
Subject: [PATCH 07/82] add above zero checks for num_boids and report_interval

---
 ocean/boids/boids.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index f815036114..c26ffa56e7 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -71,6 +71,14 @@ static void respawn_boid(Boids *env, unsigned int i) {
 }
 
 void init(Boids *env) {
+    if(env->num_boids < 1) {
+        printf("ERROR: num_boids must be bigger than 0\n");
+        exit(1);
+    }
+    if (env->report_interval < 1) {
+        printf("ERROR: report_interval must be bigger than 0\n");
+        exit(1);
+    }
     env->boids = (Boid*)calloc(env->num_boids, sizeof(Boid));
     env->log = (Log){0};
     env->tick = 0;

From 2f0e99b4f8b0b17bf0013a6ec1a47007342e6998 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Mon, 2 Jun 2025 19:38:43 -0400
Subject: [PATCH 08/82] remove unused commented flat_actions

---
 pufferlib/ocean/boids/boids.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py
index 96784b7bd9..a597b46ca3 100644
--- a/pufferlib/ocean/boids/boids.py
+++ b/pufferlib/ocean/boids/boids.py
@@ -49,7 +49,6 @@ def __init__(
         for env_num in range(num_envs):
             c_envs.append(binding.env_init(
                 self.observations[env_num*num_boids:(env_num+1)*num_boids],
-                #self.flat_actions[env_num*num_boids*ACTION_SPACE_SIZE:(env_num+1)*num_boids*ACTION_SPACE_SIZE],
                 self.actions[env_num*num_boids:(env_num+1)*num_boids],
                 self.rewards[env_num*num_boids:(env_num+1)*num_boids],
                 self.terminals[env_num*num_boids:(env_num+1)*num_boids],

From c93ad08f6e6db91ff0f723184de8705ed5714fc3 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Mon, 2 Jun 2025 19:50:18 -0400
Subject: [PATCH 09/82] simplify seperation reward and test it

---
 config/boids.ini    |  8 ++++----
 ocean/boids/boids.h | 30 +++++++++++++++++-------------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 0e6c8d33e6..691ef6c2a4 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -11,10 +11,10 @@ num_envs = 64
 num_boids = 64
 ; num_envs = 1
 ; num_boids = 1
-margin_turn_factor = 1.0
-cohesion_factor = 0.0
-seperation_factor = 0.0
-alignment_factor = 0.0
+margin_turn_factor = 0.0
+centering_factor = 0.00
+avoid_factor = 1.00
+matching_factor = 1.00
 
 [vec]
 num_workers = 2
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index c26ffa56e7..1eb161532d 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -117,7 +117,7 @@ void c_step(Boids *env) {
     Boid* current_boid;
     Boid observed_boid;
     float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
-    float diff_x, diff_y, dist, protected_dist_sum, current_boid_reward;
+    float diff_x, diff_y, dist, current_boid_reward;
     unsigned visual_count, protected_count;
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
     float mouse_x = (float)GetMouseX();
@@ -142,7 +142,7 @@ void c_step(Boids *env) {
         current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
 
         // reward calculation
-        current_boid_reward = 0.0f, protected_dist_sum = 0.0f, protected_count = 0.0f;
+        current_boid_reward = 0.0f, protected_count = 0.0f;
         visual_count = 0.0f, vis_vx_sum = 0.0f, vis_vy_sum = 0.0f, vis_x_sum = 0.0f, vis_y_sum = 0.0f;
         for (unsigned observed_indx = 0; observed_indx < env->num_boids; observed_indx++) {
             if (current_indx == observed_indx) continue;
@@ -151,7 +151,6 @@ void c_step(Boids *env) {
             diff_y = current_boid->y - observed_boid.y;
             dist = sqrtf(diff_x*diff_x + diff_y*diff_y);
             if (dist < PROTECTED_RANGE) {
-                protected_dist_sum += (PROTECTED_RANGE - dist);
                 protected_count++;
             } else if (dist < VISUAL_RANGE) {
                 vis_x_sum += observed_boid.x;
@@ -162,8 +161,7 @@ void c_step(Boids *env) {
             }
         }
         if (protected_count > 0) {
-            //current_boid_reward -= fabsf(protected_dist_sum / protected_count) * env->seperation_factor;
-            current_boid_reward -= flclip(protected_count/5.0, 0.0f, 1.0f) * env->seperation_factor;
+            current_boid_reward -= flclip(protected_count/env->num_boids * env->avoid_factor, 0.0f, 1.0f);
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;
@@ -189,15 +187,21 @@ void c_step(Boids *env) {
         // Normalization
         // env->rewards[current_indx] = current_boid_reward / 15.0f;
         // printf("current_boid_reward: %f\n", current_boid_reward);
-        env->rewards[current_indx] = current_boid_reward / 2.0f;
-        avg_reward += env->rewards[current_indx];
-    }
-    //log updates
-    avg_reward /= env->num_boids;
-    if (env->tick % env->report_interval == 0) {
-        env->log.score = avg_reward;
-        env->log.n = 1;
+        // env->rewards[current_indx] = current_boid_reward / 2.0f;
+        env->rewards[current_indx] = current_boid_reward;
+
+
+        //log updates
+        if (env->tick == env->report_interval) {
+            env->log.score          += env->rewards[current_indx];
+            env->log.n              += 1.0f;
+
+            /* clear per-boid log for next episode */
+            // env->boid_logs[boid_indx] = (Log){0};
+            env->tick = 0;
+        }
     }
+    //env->log.score /= env->num_boids;
 
     compute_observations(env);
 }

From 232dfb6899fbd5711e0ca5381b8426c73cde1e88 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Mon, 2 Jun 2025 19:51:30 -0400
Subject: [PATCH 10/82] test out only avoid factor

---
 config/boids.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/boids.ini b/config/boids.ini
index 691ef6c2a4..c9be5ee885 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -14,7 +14,7 @@ num_boids = 64
 margin_turn_factor = 0.0
 centering_factor = 0.00
 avoid_factor = 1.00
-matching_factor = 1.00
+matching_factor = 0.00
 
 [vec]
 num_workers = 2

From ba3a091fd2ca3dc8ee914c495f191e7c0ca7bb5e Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 3 Jun 2025 16:07:11 -0400
Subject: [PATCH 11/82] remove unused avg_reward and change seperation factor
 reward

---
 ocean/boids/boids.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 1eb161532d..f5c9037e62 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -122,7 +122,6 @@ void c_step(Boids *env) {
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
     float mouse_x = (float)GetMouseX();
     float mouse_y = (float)GetMouseY();
-    float avg_reward = 0.0f;
 
     env->tick++;
     env->rewards[0] = 0;
@@ -161,7 +160,7 @@ void c_step(Boids *env) {
             }
         }
         if (protected_count > 0) {
-            current_boid_reward -= flclip(protected_count/env->num_boids * env->avoid_factor, 0.0f, 1.0f);
+            current_boid_reward -= (float)((env->num_boids - protected_count) - protected_count) / env->num_boids * env->seperation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;

From 564e7182dcdcbfc609c65a08b6435164b3edb365 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 4 Jun 2025 14:17:11 -0400
Subject: [PATCH 12/82] fix factor names

---
 config/boids.ini | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index c9be5ee885..0edba1bff0 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -12,9 +12,9 @@ num_boids = 64
 ; num_envs = 1
 ; num_boids = 1
 margin_turn_factor = 0.0
-centering_factor = 0.00
-avoid_factor = 1.00
-matching_factor = 0.00
+cohesion_factor = 0.00
+seperation_factor = 1.00
+alignment_factor = 0.00
 
 [vec]
 num_workers = 2

From 45aa2a270eac0f361d91e521b2591747a85574c9 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 4 Jun 2025 15:06:38 -0400
Subject: [PATCH 13/82] remove unused commented code

---
 ocean/boids/boids.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index f5c9037e62..32ead6670c 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -194,13 +194,9 @@ void c_step(Boids *env) {
         if (env->tick == env->report_interval) {
             env->log.score          += env->rewards[current_indx];
             env->log.n              += 1.0f;
-
-            /* clear per-boid log for next episode */
-            // env->boid_logs[boid_indx] = (Log){0};
             env->tick = 0;
         }
     }
-    //env->log.score /= env->num_boids;
 
     compute_observations(env);
 }

From 6fbd13c2fd4d2e3d1186136a2239aaa82c040ab7 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 02:29:46 -0400
Subject: [PATCH 14/82] fix seperation factor reward calculation

---
 ocean/boids/boids.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 32ead6670c..476b461e7e 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -117,7 +117,7 @@ void c_step(Boids *env) {
     Boid* current_boid;
     Boid observed_boid;
     float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
-    float diff_x, diff_y, dist, current_boid_reward;
+    float diff_x, diff_y, dist, current_boid_reward, protected_range_diff;
     unsigned visual_count, protected_count;
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
     float mouse_x = (float)GetMouseX();
@@ -160,7 +160,8 @@ void c_step(Boids *env) {
             }
         }
         if (protected_count > 0) {
-            current_boid_reward -= (float)((env->num_boids - protected_count) - protected_count) / env->num_boids * env->seperation_factor;
+            protected_range_diff = (float)(env->num_boids - protected_count) - protected_count;
+            current_boid_reward += protected_range_diff / env->num_boids * env->seperation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;

From d33c68805b207d7ea15e5d8010a51cbf071d4f41 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 03:02:49 -0400
Subject: [PATCH 15/82] remove unused commented params

---
 config/boids.ini | 47 +----------------------------------------------
 1 file changed, 1 insertion(+), 46 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 0edba1bff0..0b3fe5a6d0 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -25,49 +25,4 @@ batch_size = auto
 total_timesteps = 100_000_000
 gamma = 0.95
 learning_rate = 0.025
-minibatch_size = 16384
-; minibatch_size = 1
-
-; [sweep]
-; method = protein
-; metric = episode_length
-
-; [sweep.train.total_timesteps]
-; distribution = log_normal
-; min = 1e6
-; max = 1e7
-; mean = 5e6
-; scale = 0.5
-
-; [sweep.train.gamma]
-; distribution = log_normal
-; min = 0.9
-; max = 0.999
-; mean = 0.97
-
-; [sweep.train.gae_lambda]
-; distribution = log_normal
-; min = 0.7
-; max = 0.999
-; mean = 0.95
-
-; [sweep.train.learning_rate]
-; distribution = log_normal
-; min = 0.0001
-; max = 0.001
-; mean = 0.00025
-; scale = 0.5
-
-; [sweep.train.batch_size]
-; min = 32768
-; max = 131072
-; mean = 65536
-; scale = 0.5
-
-; [sweep.train.minibatch_size]
-; min = 512
-; max = 2048
-; mean = 1024
-; scale = 0.5
-
-
+minibatch_size = 16384
\ No newline at end of file

From d3ca11d0dbaa2f79448798a5e18cd48597a72103 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 12:58:48 -0400
Subject: [PATCH 16/82] remove normalization from separation factor calculation

---
 ocean/boids/boids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 476b461e7e..0b0db55b0e 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -161,7 +161,7 @@ void c_step(Boids *env) {
         }
         if (protected_count > 0) {
             protected_range_diff = (float)(env->num_boids - protected_count) - protected_count;
-            current_boid_reward += protected_range_diff / env->num_boids * env->seperation_factor;
+            current_boid_reward += protected_range_diff * env->seperation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;

From cb311570a0c6e5644bfb464f257375b8c3415ba0 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 14:21:18 -0400
Subject: [PATCH 17/82] fix visual range

---
 ocean/boids/boids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 0b0db55b0e..77a98f204f 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -13,7 +13,7 @@
 #define LEFT_MARGIN 50
 #define RIGHT_MARGIN 50
 #define VELOCITY_CAP 5
-#define VISUAL_RANGE 20
+#define VISUAL_RANGE 400
 #define PROTECTED_RANGE 100
 #define WIDTH 1080
 #define HEIGHT 720

From 6fdeba6c18e0e4911de11d0684d6bb0c3cdbb182 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 20:24:34 -0400
Subject: [PATCH 18/82] remove positve margin rewards and remove commented code

---
 ocean/boids/boids.h | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 77a98f204f..744440cf90 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -10,8 +10,8 @@
 
 #define TOP_MARGIN 50
 #define BOTTOM_MARGIN 50
-#define LEFT_MARGIN 50
-#define RIGHT_MARGIN 50
+#define LEFT_MARGIN 100
+#define RIGHT_MARGIN 100
 #define VELOCITY_CAP 5
 #define VISUAL_RANGE 400
 #define PROTECTED_RANGE 100
@@ -176,20 +176,13 @@ void c_step(Boids *env) {
         }
         if (current_boid->y < TOP_MARGIN || current_boid->y > HEIGHT - BOTTOM_MARGIN) {
             current_boid_reward -= env->margin_turn_factor;
-        } else {
-            current_boid_reward += env->margin_turn_factor;
         }
         if (current_boid->x < LEFT_MARGIN || current_boid->x > WIDTH  - RIGHT_MARGIN) {
             current_boid_reward -= env->margin_turn_factor;
-        } else {
-            current_boid_reward += env->margin_turn_factor;
         }
-        // Normalization
-        // env->rewards[current_indx] = current_boid_reward / 15.0f;
-        // printf("current_boid_reward: %f\n", current_boid_reward);
-        // env->rewards[current_indx] = current_boid_reward / 2.0f;
-        env->rewards[current_indx] = current_boid_reward;
 
+        // Normalization
+        env->rewards[current_indx] = current_boid_reward / 4.0f;
 
         //log updates
         if (env->tick == env->report_interval) {

From 02897518491be4596cd0e6d26d5e0a2fd7f88ae9 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 20:33:52 -0400
Subject: [PATCH 19/82] add factors to env run with "boids.c"

---
 ocean/boids/boids.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index 14de0d5e5a..aca6d35c68 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -10,6 +10,10 @@
 #define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
 #define MAX_STEPS_DEMO 500 // Max steps per episode in the demo
 #define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
+#define MARGIN_TURN_FACTOR 1.0
+#define COHESION_FACTOR 0.0
+#define SEPERATION_FACTOR 0.0
+#define ALIGNMENT_FACTOR 0.0
 
 // Dummy action generation: random velocity changes for each boid
 void generate_dummy_actions(Boids* env) {
@@ -29,6 +33,10 @@ void demo() {
     Boids env = {0}; 
     env.num_boids = NUM_BOIDS_DEMO;
     env.report_interval = REPORT_INTERVAL_DEMO;
+    env.margin_turn_factor = MARGIN_TURN_FACTOR;
+    env.cohesion_factor = COHESION_FACTOR;
+    env.seperation_factor = SEPERATION_FACTOR;
+    env.alignment_factor = ALIGNMENT_FACTOR;
     
     // In the Python binding, these pointers are assigned from NumPy arrays.
     // Here, we need to allocate them explicitly.

From 0a18e2eda5bc2f7310ed095763ada7ffb75a10fe Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 20:44:00 -0400
Subject: [PATCH 20/82] add debug margin lines and adjust reward normalization

---
 ocean/boids/boids.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 744440cf90..7caef00d68 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -182,7 +182,7 @@ void c_step(Boids *env) {
         }
 
         // Normalization
-        env->rewards[current_indx] = current_boid_reward / 4.0f;
+        env->rewards[current_indx] = current_boid_reward / 2.0f;
 
         //log updates
         if (env->tick == env->report_interval) {
@@ -257,6 +257,11 @@ void c_render(Boids* env) {
         BeginDrawing();
         ClearBackground((Color){6, 24, 24, 255});
 
+        DrawLine(LEFT_MARGIN, 0, LEFT_MARGIN, HEIGHT, RED);
+        DrawLine(WIDTH - RIGHT_MARGIN, 0, WIDTH - RIGHT_MARGIN, HEIGHT, RED);
+        DrawLine(0, TOP_MARGIN, WIDTH, TOP_MARGIN, RED);
+        DrawLine(0, HEIGHT - BOTTOM_MARGIN, WIDTH, HEIGHT - BOTTOM_MARGIN, RED);
+
         for (unsigned boid_indx = 0; boid_indx < env->num_boids; boid_indx++) {
             DrawTexturePro(
                 env->client->boid_texture,

From 8c69a45f323a38d904e9b5c8a6edfb55b06af291 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 20:45:09 -0400
Subject: [PATCH 21/82] only turn on margin turn factor and adjust total
 timesteps

---
 config/boids.ini | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 0b3fe5a6d0..b3732b5126 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -11,10 +11,10 @@ num_envs = 64
 num_boids = 64
 ; num_envs = 1
 ; num_boids = 1
-margin_turn_factor = 0.0
-cohesion_factor = 0.00
-seperation_factor = 1.00
-alignment_factor = 0.00
+margin_turn_factor = 1.0
+cohesion_factor = 0.0
+seperation_factor = 0.0
+alignment_factor = 0.0
 
 [vec]
 num_workers = 2
@@ -22,7 +22,8 @@ num_envs = 2
 batch_size = auto
 
 [train]
-total_timesteps = 100_000_000
+total_timesteps = 150_000_000
+; total_timesteps = 15_000_000
 gamma = 0.95
 learning_rate = 0.025
 minibatch_size = 16384
\ No newline at end of file

From 2fc5c9ed9e374b2f8556d3792904d87f21064fca Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 21:35:23 -0400
Subject: [PATCH 22/82] change top/bottom margins

---
 ocean/boids/boids.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 7caef00d68..aef336cd1a 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -8,8 +8,8 @@
 
 #include "raylib.h"
 
-#define TOP_MARGIN 50
-#define BOTTOM_MARGIN 50
+#define TOP_MARGIN 100
+#define BOTTOM_MARGIN 100
 #define LEFT_MARGIN 100
 #define RIGHT_MARGIN 100
 #define VELOCITY_CAP 5

From 509fed7ec96617138770add04c52981665b0677e Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 5 Jun 2025 21:39:33 -0400
Subject: [PATCH 23/82] account for boid width and hight in margin reward
 calculation

---
 ocean/boids/boids.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index aef336cd1a..54242bd65e 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -174,10 +174,10 @@ void c_step(Boids *env) {
             current_boid_reward -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
             current_boid_reward -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
         }
-        if (current_boid->y < TOP_MARGIN || current_boid->y > HEIGHT - BOTTOM_MARGIN) {
+        if (current_boid->y < TOP_MARGIN || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) {
             current_boid_reward -= env->margin_turn_factor;
         }
-        if (current_boid->x < LEFT_MARGIN || current_boid->x > WIDTH  - RIGHT_MARGIN) {
+        if (current_boid->x < LEFT_MARGIN || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) {
             current_boid_reward -= env->margin_turn_factor;
         }
 

From 7a12bb71c68a6bc37e982b37f4fce57bc2afe4f8 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 11 Jun 2025 00:11:20 -0400
Subject: [PATCH 24/82] increase max steps

---
 ocean/boids/boids.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index aca6d35c68..8718e70065 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -8,7 +8,7 @@
 // --- Demo Configuration ---
 #define NUM_BOIDS_DEMO 20   // Number of boids for the standalone demo
 #define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
-#define MAX_STEPS_DEMO 500 // Max steps per episode in the demo
+#define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
 #define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
 #define MARGIN_TURN_FACTOR 1.0
 #define COHESION_FACTOR 0.0

From 6dbbc749851a4f18f9e3846b1156ad071a6f7a7c Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 11 Jun 2025 20:17:24 -0400
Subject: [PATCH 25/82] remove debug margin lines

---
 ocean/boids/boids.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 54242bd65e..6b0311d376 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -257,11 +257,6 @@ void c_render(Boids* env) {
         BeginDrawing();
         ClearBackground((Color){6, 24, 24, 255});
 
-        DrawLine(LEFT_MARGIN, 0, LEFT_MARGIN, HEIGHT, RED);
-        DrawLine(WIDTH - RIGHT_MARGIN, 0, WIDTH - RIGHT_MARGIN, HEIGHT, RED);
-        DrawLine(0, TOP_MARGIN, WIDTH, TOP_MARGIN, RED);
-        DrawLine(0, HEIGHT - BOTTOM_MARGIN, WIDTH, HEIGHT - BOTTOM_MARGIN, RED);
-
         for (unsigned boid_indx = 0; boid_indx < env->num_boids; boid_indx++) {
             DrawTexturePro(
                 env->client->boid_texture,

From e7db00e1687e1f6a794816c7303f7efd2b66f641 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 11 Jun 2025 20:18:27 -0400
Subject: [PATCH 26/82] fix observations for margin factor

---
 ocean/boids/boids.h            | 20 ++++++++++++++++----
 pufferlib/ocean/boids/boids.py |  2 +-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 6b0311d376..b883d72246 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -95,11 +95,23 @@ void init(Boids *env) {
 static void compute_observations(Boids *env) {
     int idx = 0;
     for (unsigned i=0; i<env->num_boids; i++) {
+        env->observations[idx++] = env->boids[i].x / WIDTH;
+        env->observations[idx++] = env->boids[i].y / HEIGHT;
+        env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP;
+        env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP;
+        for (unsigned j=0; j<4; j++) {
+            env->observations[idx++] = 0;
+        }
         for (unsigned j=0; j<env->num_boids; j++) {
-            env->observations[idx++] = (env->boids[j].x - env->boids[i].x) / WIDTH;
-            env->observations[idx++] = (env->boids[j].y - env->boids[i].y) / HEIGHT;
-            env->observations[idx++] = (env->boids[j].velocity.x - env->boids[i].velocity.x) / VELOCITY_CAP;
-            env->observations[idx++] = (env->boids[j].velocity.y - env->boids[i].velocity.y) / VELOCITY_CAP;
+            if (i == j) continue;
+            env->observations[idx++] = env->boids[j].x / WIDTH;
+            env->observations[idx++] = env->boids[j].y / HEIGHT;
+            env->observations[idx++] = env->boids[j].velocity.x / VELOCITY_CAP;
+            env->observations[idx++] = env->boids[j].velocity.y / VELOCITY_CAP;
+            env->observations[idx++] = (env->boids[i].x - env->boids[j].x) / WIDTH;
+            env->observations[idx++] = (env->boids[i].y - env->boids[j].y) / HEIGHT;
+            env->observations[idx++] = (env->boids[i].velocity.x - env->boids[j].velocity.x) / VELOCITY_CAP;
+            env->observations[idx++] = (env->boids[i].velocity.y - env->boids[j].velocity.y) / VELOCITY_CAP;
         }
     }
 }
diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py
index a597b46ca3..5582cec81f 100644
--- a/pufferlib/ocean/boids/boids.py
+++ b/pufferlib/ocean/boids/boids.py
@@ -27,7 +27,7 @@ def __init__(
         self.num_boids = num_boids
 
         self.single_observation_space = gymnasium.spaces.Box(
-            -1000.0, 1000.0, shape=(num_boids*4,), dtype=np.float32
+            -1000.0, 1000.0, shape=(num_boids*8,), dtype=np.float32
         )
         
         #self.single_action_space = gymnasium.spaces.Box(

From 7a490205aed07e2a8553ba695719f2e2d14cd14a Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 11 Jun 2025 20:50:23 -0400
Subject: [PATCH 27/82] remove single agent params

---
 config/boids.ini | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index b3732b5126..ca757d9cf1 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -9,11 +9,9 @@ rnn_name = Recurrent
 [env]
 num_envs = 64
 num_boids = 64
-; num_envs = 1
-; num_boids = 1
-margin_turn_factor = 1.0
+margin_turn_factor = 0.0
 cohesion_factor = 0.0
-seperation_factor = 0.0
+seperation_factor = 1.0
 alignment_factor = 0.0
 
 [vec]
@@ -22,8 +20,8 @@ num_envs = 2
 batch_size = auto
 
 [train]
-total_timesteps = 150_000_000
-; total_timesteps = 15_000_000
+; total_timesteps = 150_000_000
+total_timesteps = 50_000_000
 gamma = 0.95
 learning_rate = 0.025
 minibatch_size = 16384
\ No newline at end of file

From 997e63d07ff13a8a21e5a4cea59543acbd5cb542 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 11 Jun 2025 21:59:46 -0400
Subject: [PATCH 28/82] update boids.c observations allocation

---
 ocean/boids/boids.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index 8718e70065..ef522c3df1 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -38,9 +38,7 @@ void demo() {
     env.seperation_factor = SEPERATION_FACTOR;
     env.alignment_factor = ALIGNMENT_FACTOR;
     
-    // In the Python binding, these pointers are assigned from NumPy arrays.
-    // Here, we need to allocate them explicitly.
-    size_t obs_size = env.num_boids * env.num_boids * 4; // the 4 = (x, y, vx, vy)
+    size_t obs_size = env.num_boids * env.num_boids * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
     size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy)
     env.observations = (float*)calloc(obs_size, sizeof(float));
     env.actions = (float*)calloc(act_size, sizeof(float));

From e8641fa6ae8274600b181a141f7d222c9f57081c Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 11 Jun 2025 22:00:04 -0400
Subject: [PATCH 29/82] update observations and actions comments

---
 ocean/boids/boids.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index b883d72246..547b62e4a8 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -39,9 +39,12 @@ typedef struct {
 
 typedef struct Client Client;
 typedef struct {
-    // an array of shape (num_boids, 4) with the 4 values correspoinding to (x, y, velocity x, velocity y)
+    // Flat array of shape (num_boids * 8) values:
+    // - Each boid has 8 values corresponding to (x, y, vx, vy, dx, dy, dvx, dvy)
+    // - The first 8 values are for the boid itself
+    // - All the other 8 values for the other boids
     float* observations;
-    // an array of shape (num_boids, 2) with the 2 values correspoinding to (velocity x, velocity y)
+    // an array of shape (num_boids, 2) with the 2 values correspoinding to (dvx, dvy)
     float* actions;
     // an array of shape (1) with the summed up reward for all boids
     float* rewards;

From 837ae37dde38e1c2e04962846cee83ca162e422f Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 4 Jul 2025 01:39:29 -0400
Subject: [PATCH 30/82] remove commented parameters and update parameters to
 current best

---
 config/boids.ini | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index ca757d9cf1..cf03a8aa8d 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -1,18 +1,16 @@
 [base]
 package = ocean
 env_name = puffer_boids 
-; policy_name = Boids
 policy_name = Policy
 rnn_name = Recurrent
-; rnn_name = None
 
 [env]
 num_envs = 64
 num_boids = 64
-margin_turn_factor = 0.0
-cohesion_factor = 0.0
-seperation_factor = 1.0
-alignment_factor = 0.0
+margin_turn_factor = 2.0
+cohesion_factor = 0.0048
+separation_factor = 0.0128
+alignment_factor = 0.2
 
 [vec]
 num_workers = 2
@@ -20,8 +18,7 @@ num_envs = 2
 batch_size = auto
 
 [train]
-; total_timesteps = 150_000_000
-total_timesteps = 50_000_000
+total_timesteps = 100_000_000
 gamma = 0.95
 learning_rate = 0.025
 minibatch_size = 16384
\ No newline at end of file

From 67481cc9d9f0c0d11ddc7aa394e74e0815d521bf Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 4 Jul 2025 01:41:22 -0400
Subject: [PATCH 31/82] fix to "separation_factor" instead of
 "seperation_factor"

---
 ocean/boids/binding.c          | 2 +-
 ocean/boids/boids.c            | 4 ++--
 ocean/boids/boids.h            | 2 +-
 pufferlib/ocean/boids/boids.py | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index 89cb389759..d8e5820169 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -8,7 +8,7 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
     env->report_interval = unpack(kwargs, "report_interval");
     env->margin_turn_factor = unpack(kwargs, "margin_turn_factor");
     env->cohesion_factor = unpack(kwargs, "cohesion_factor");
-    env->seperation_factor = unpack(kwargs, "seperation_factor");
+    env->separation_factor = unpack(kwargs, "separation_factor");
     env->alignment_factor = unpack(kwargs, "alignment_factor");
     init(env);
     return 0;
diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index ef522c3df1..4e4937666c 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -12,7 +12,7 @@
 #define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
 #define MARGIN_TURN_FACTOR 1.0
 #define COHESION_FACTOR 0.0
-#define SEPERATION_FACTOR 0.0
+#define SEPARATION_FACTOR 0.0
 #define ALIGNMENT_FACTOR 0.0
 
 // Dummy action generation: random velocity changes for each boid
@@ -35,7 +35,7 @@ void demo() {
     env.report_interval = REPORT_INTERVAL_DEMO;
     env.margin_turn_factor = MARGIN_TURN_FACTOR;
     env.cohesion_factor = COHESION_FACTOR;
-    env.seperation_factor = SEPERATION_FACTOR;
+    env.separation_factor = SEPARATION_FACTOR;
     env.alignment_factor = ALIGNMENT_FACTOR;
     
     size_t obs_size = env.num_boids * env.num_boids * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 547b62e4a8..8c17f8712f 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -53,7 +53,7 @@ typedef struct {
     unsigned int num_boids;
     float margin_turn_factor;
     float cohesion_factor;
-    float seperation_factor;
+    float separation_factor;
     float alignment_factor;
     unsigned tick;
     Log log;
diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py
index 5582cec81f..6eadcc8627 100644
--- a/pufferlib/ocean/boids/boids.py
+++ b/pufferlib/ocean/boids/boids.py
@@ -19,7 +19,7 @@ def __init__(
         num_boids=1,
         margin_turn_factor=1.0,
         cohesion_factor=0.0,
-        seperation_factor=0.0,
+        separation_factor=0.0,
         alignment_factor=0.0
     ):
         ACTION_SPACE_SIZE = 2
@@ -58,7 +58,7 @@ def __init__(
                 report_interval=self.report_interval,
                 margin_turn_factor=margin_turn_factor,
                 cohesion_factor=cohesion_factor,
-                seperation_factor=seperation_factor,
+                separation_factor=separation_factor,
                 alignment_factor=alignment_factor,
             ))
         

From 1511dbe201234c4f4c17b7b367bef6ae2da7d744 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 4 Jul 2025 01:42:33 -0400
Subject: [PATCH 32/82] update preset env parameters

---
 ocean/boids/boids.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 8c17f8712f..460b4531b9 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -8,13 +8,13 @@
 
 #include "raylib.h"
 
-#define TOP_MARGIN 100
-#define BOTTOM_MARGIN 100
-#define LEFT_MARGIN 100
-#define RIGHT_MARGIN 100
+#define TOP_MARGIN 50
+#define BOTTOM_MARGIN 50
+#define LEFT_MARGIN 50
+#define RIGHT_MARGIN 50
 #define VELOCITY_CAP 5
 #define VISUAL_RANGE 400
-#define PROTECTED_RANGE 100
+#define PROTECTED_RANGE 60
 #define WIDTH 1080
 #define HEIGHT 720
 #define BOID_WIDTH 32

From 24679c3061db4c181c2b41388bebb5c6b2ae0d5f Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 4 Jul 2025 01:42:50 -0400
Subject: [PATCH 33/82] condence controlled boid observation loop

---
 ocean/boids/boids.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 460b4531b9..f84e25b4c6 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -102,9 +102,7 @@ static void compute_observations(Boids *env) {
         env->observations[idx++] = env->boids[i].y / HEIGHT;
         env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP;
         env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP;
-        for (unsigned j=0; j<4; j++) {
-            env->observations[idx++] = 0;
-        }
+        for (unsigned j=0; j<4; j++) { env->observations[idx++] = 0; }
         for (unsigned j=0; j<env->num_boids; j++) {
             if (i == j) continue;
             env->observations[idx++] = env->boids[j].x / WIDTH;

From f1caeddcbb6d58fab0c79dd64b95a1cf96d68fce Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 6 Jul 2025 23:15:44 -0400
Subject: [PATCH 34/82] remove use of protected range diff

---
 ocean/boids/boids.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index f84e25b4c6..8b45dd463a 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -130,7 +130,7 @@ void c_step(Boids *env) {
     Boid* current_boid;
     Boid observed_boid;
     float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
-    float diff_x, diff_y, dist, current_boid_reward, protected_range_diff;
+    float diff_x, diff_y, dist, current_boid_reward;
     unsigned visual_count, protected_count;
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
     float mouse_x = (float)GetMouseX();
@@ -173,8 +173,10 @@ void c_step(Boids *env) {
             }
         }
         if (protected_count > 0) {
-            protected_range_diff = (float)(env->num_boids - protected_count) - protected_count;
-            current_boid_reward += protected_range_diff * env->seperation_factor;
+            // protected_range_diff = (float)(env->num_boids - protected_count) - protected_count;
+            // current_boid_reward += protected_range_diff * env->seperation_factor;
+
+            current_boid_reward -= protected_count * env->separation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;

From 4f1656a7c384a3ee940c06f952df60497abea794 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Mon, 7 Jul 2025 00:36:54 -0400
Subject: [PATCH 35/82] change reward normalization number

---
 ocean/boids/boids.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 8b45dd463a..f7391126d2 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -197,7 +197,10 @@ void c_step(Boids *env) {
         }
 
         // Normalization
-        env->rewards[current_indx] = current_boid_reward / 2.0f;
+        // env->rewards[current_indx] = current_boid_reward;
+        env->rewards[current_indx] = current_boid_reward / 6.0f;
+        // env->rewards[current_indx] = current_boid_reward / 205.0f;
+        // env->rewards[current_indx] = current_boid_reward / 10.0f;
 
         //log updates
         if (env->tick == env->report_interval) {

From 0fe8839993acc450d45658ce859d8277bdc994ab Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 8 Jul 2025 19:52:52 -0400
Subject: [PATCH 36/82] update puffer resource path

---
 ocean/boids/boids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index f7391126d2..9e9b87db89 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -19,7 +19,7 @@
 #define HEIGHT 720
 #define BOID_WIDTH 32
 #define BOID_HEIGHT 32
-#define BOID_TEXTURE_PATH "./resources/puffers_128.png"
+#define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png"
 
 typedef struct {
     float score;

From bd4827c1117b92e35d6f08bdde28d04c0e49dc03 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 27 Jul 2025 16:30:53 -0400
Subject: [PATCH 37/82] enable all factors

---
 config/boids.ini | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index cf03a8aa8d..683d8e9189 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -6,8 +6,8 @@ rnn_name = Recurrent
 
 [env]
 num_envs = 64
-num_boids = 64
-margin_turn_factor = 2.0
+num_boids = 16
+margin_turn_factor = 1.0
 cohesion_factor = 0.0048
 separation_factor = 0.0128
 alignment_factor = 0.2
@@ -19,6 +19,7 @@ batch_size = auto
 
 [train]
 total_timesteps = 100_000_000
+; total_timesteps = 80_000_000
 gamma = 0.95
 learning_rate = 0.025
 minibatch_size = 16384
\ No newline at end of file

From aa85ccc56ded79209eba7f6a78341ddb29971bc2 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 27 Jul 2025 16:31:39 -0400
Subject: [PATCH 38/82] add euclidean distance to observations

---
 ocean/boids/boids.h            | 17 ++++++++++++++---
 pufferlib/ocean/boids/boids.py |  2 +-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 9e9b87db89..a56f489437 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -20,6 +20,7 @@
 #define BOID_WIDTH 32
 #define BOID_HEIGHT 32
 #define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png"
+#define MAX_DIST 2000
 
 typedef struct {
     float score;
@@ -97,20 +98,30 @@ void init(Boids *env) {
 
 static void compute_observations(Boids *env) {
     int idx = 0;
+    float diff_x, diff_y, dist;
     for (unsigned i=0; i<env->num_boids; i++) {
+        // observations for the current boid
         env->observations[idx++] = env->boids[i].x / WIDTH;
         env->observations[idx++] = env->boids[i].y / HEIGHT;
         env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP;
         env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP;
-        for (unsigned j=0; j<4; j++) { env->observations[idx++] = 0; }
+        // zeros for relative observations since comparing to itself will always be 0
+        for (unsigned j=0; j<5; j++) { env->observations[idx++] = 0; }
+
+        // observations for the other boids compared to the current boid
         for (unsigned j=0; j<env->num_boids; j++) {
             if (i == j) continue;
+            diff_x = env->boids[i].x - env->boids[j].x;
+            diff_y = env->boids[i].y - env->boids[j].y;
+            dist = sqrtf(diff_x*diff_x + diff_y*diff_y);
+
             env->observations[idx++] = env->boids[j].x / WIDTH;
             env->observations[idx++] = env->boids[j].y / HEIGHT;
             env->observations[idx++] = env->boids[j].velocity.x / VELOCITY_CAP;
             env->observations[idx++] = env->boids[j].velocity.y / VELOCITY_CAP;
-            env->observations[idx++] = (env->boids[i].x - env->boids[j].x) / WIDTH;
-            env->observations[idx++] = (env->boids[i].y - env->boids[j].y) / HEIGHT;
+            env->observations[idx++] = diff_x / WIDTH;
+            env->observations[idx++] = diff_y / HEIGHT;
+            env->observations[idx++] = dist / MAX_DIST;
             env->observations[idx++] = (env->boids[i].velocity.x - env->boids[j].velocity.x) / VELOCITY_CAP;
             env->observations[idx++] = (env->boids[i].velocity.y - env->boids[j].velocity.y) / VELOCITY_CAP;
         }
diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py
index 6eadcc8627..5e9b522226 100644
--- a/pufferlib/ocean/boids/boids.py
+++ b/pufferlib/ocean/boids/boids.py
@@ -27,7 +27,7 @@ def __init__(
         self.num_boids = num_boids
 
         self.single_observation_space = gymnasium.spaces.Box(
-            -1000.0, 1000.0, shape=(num_boids*8,), dtype=np.float32
+            -1000.0, 1000.0, shape=(num_boids*9,), dtype=np.float32
         )
         
         #self.single_action_space = gymnasium.spaces.Box(

From 959618178261101c6da49fa9644cb6f9a64c3229 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 27 Jul 2025 16:34:44 -0400
Subject: [PATCH 39/82] add euclidean distance to local build observations

---
 ocean/boids/boids.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index 4e4937666c..0573cb9735 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -38,7 +38,7 @@ void demo() {
     env.separation_factor = SEPARATION_FACTOR;
     env.alignment_factor = ALIGNMENT_FACTOR;
     
-    size_t obs_size = env.num_boids * env.num_boids * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
+    size_t obs_size = env.num_boids * env.num_boids * 9; // 9 = (x, y, vx, vy, dx, dy, dist, dvx, dvy)
     size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy)
     env.observations = (float*)calloc(obs_size, sizeof(float));
     env.actions = (float*)calloc(act_size, sizeof(float));

From 98fd5de301e4b6f18bd78332981578cfa27193aa Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 4 Apr 2026 13:37:08 -0400
Subject: [PATCH 40/82] move boids config to new 4.0 location

---
 config/boids.ini | 60 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 683d8e9189..0e6c8d33e6 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -1,16 +1,20 @@
 [base]
 package = ocean
 env_name = puffer_boids 
+; policy_name = Boids
 policy_name = Policy
 rnn_name = Recurrent
+; rnn_name = None
 
 [env]
 num_envs = 64
-num_boids = 16
+num_boids = 64
+; num_envs = 1
+; num_boids = 1
 margin_turn_factor = 1.0
-cohesion_factor = 0.0048
-separation_factor = 0.0128
-alignment_factor = 0.2
+cohesion_factor = 0.0
+seperation_factor = 0.0
+alignment_factor = 0.0
 
 [vec]
 num_workers = 2
@@ -19,7 +23,51 @@ batch_size = auto
 
 [train]
 total_timesteps = 100_000_000
-; total_timesteps = 80_000_000
 gamma = 0.95
 learning_rate = 0.025
-minibatch_size = 16384
\ No newline at end of file
+minibatch_size = 16384
+; minibatch_size = 1
+
+; [sweep]
+; method = protein
+; metric = episode_length
+
+; [sweep.train.total_timesteps]
+; distribution = log_normal
+; min = 1e6
+; max = 1e7
+; mean = 5e6
+; scale = 0.5
+
+; [sweep.train.gamma]
+; distribution = log_normal
+; min = 0.9
+; max = 0.999
+; mean = 0.97
+
+; [sweep.train.gae_lambda]
+; distribution = log_normal
+; min = 0.7
+; max = 0.999
+; mean = 0.95
+
+; [sweep.train.learning_rate]
+; distribution = log_normal
+; min = 0.0001
+; max = 0.001
+; mean = 0.00025
+; scale = 0.5
+
+; [sweep.train.batch_size]
+; min = 32768
+; max = 131072
+; mean = 65536
+; scale = 0.5
+
+; [sweep.train.minibatch_size]
+; min = 512
+; max = 2048
+; mean = 1024
+; scale = 0.5
+
+

From 55f772ee26f7f73d1f104812db6eb1d7b9a8304e Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 11 Apr 2026 15:58:55 -0400
Subject: [PATCH 41/82] remove legacy boids.py

---
 pufferlib/ocean/boids/boids.py | 117 ---------------------------------
 1 file changed, 117 deletions(-)
 delete mode 100644 pufferlib/ocean/boids/boids.py

diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py
deleted file mode 100644
index 5e9b522226..0000000000
--- a/pufferlib/ocean/boids/boids.py
+++ /dev/null
@@ -1,117 +0,0 @@
-'''
-High-perf Boids
-Inspired by https://people.ece.cornell.edu/land/courses/ece4760/labs/s2021/Boids/Boids.html
-'''
-
-import numpy as np
-import gymnasium
-import pufferlib
-from pufferlib.ocean.boids import binding
-
-class Boids(pufferlib.PufferEnv):
-    def __init__(
-        self,
-        num_envs=1,
-        buf=None,
-        render_mode=None,
-        seed=0,
-        report_interval=1,
-        num_boids=1,
-        margin_turn_factor=1.0,
-        cohesion_factor=0.0,
-        separation_factor=0.0,
-        alignment_factor=0.0
-    ):
-        ACTION_SPACE_SIZE = 2
-        self.num_agents = num_envs * num_boids
-        self.num_boids = num_boids
-
-        self.single_observation_space = gymnasium.spaces.Box(
-            -1000.0, 1000.0, shape=(num_boids*9,), dtype=np.float32
-        )
-        
-        #self.single_action_space = gymnasium.spaces.Box(
-        #    -np.inf, np.inf, shape=(ACTION_SPACE_SIZE,), dtype=np.float32
-        #)
-        self.single_action_space = gymnasium.spaces.MultiDiscrete([5, 5])
-
-        self.render_mode = render_mode
-        self.report_interval = report_interval
-
-        super().__init__(buf)
-        self.actions = self.actions.astype(np.float32)
-
-        # Create C binding with flattened action buffer
-        # We need to manually create a flattened action buffer to pass to C
-        #self.flat_actions = np.zeros((self.num_agents * ACTION_SPACE_SIZE), dtype=np.float32)
-        
-        c_envs = []
-        for env_num in range(num_envs):
-            c_envs.append(binding.env_init(
-                self.observations[env_num*num_boids:(env_num+1)*num_boids],
-                self.actions[env_num*num_boids:(env_num+1)*num_boids],
-                self.rewards[env_num*num_boids:(env_num+1)*num_boids],
-                self.terminals[env_num*num_boids:(env_num+1)*num_boids],
-                self.truncations[env_num*num_boids:(env_num+1)*num_boids],
-                seed,
-                num_boids=num_boids,
-                report_interval=self.report_interval,
-                margin_turn_factor=margin_turn_factor,
-                cohesion_factor=cohesion_factor,
-                separation_factor=separation_factor,
-                alignment_factor=alignment_factor,
-            ))
-        
-        self.c_envs = binding.vectorize(*c_envs)
-
-    def reset(self, seed=0):
-        self.tick = 0
-        binding.vec_reset(self.c_envs, seed)
-        return self.observations, []
-
-    def step(self, actions):
-        # Clip actions to valid range
-        clipped_actions = (actions.astype(np.float32) - 2.0) / 4.0
-        
-        # Save the original actions for the experience buffer
-        # TODO: Same thing with this
-        self.actions[:] = clipped_actions
-        
-        self.tick += 1
-        binding.vec_step(self.c_envs)
-
-        info = []
-        if self.tick % self.report_interval == 0:
-            log_data = binding.vec_log(self.c_envs)
-            if log_data:
-                info.append(log_data)
-
-        return (self.observations, self.rewards,
-            self.terminals, self.truncations, info)
-
-    def render(self):
-        binding.vec_render(self.c_envs, 0)
-
-    def close(self):
-        binding.vec_close(self.c_envs)
-
-def test_performance(timeout=10, atn_cache=1024):
-    env = Boids(num_envs=1000)
-    env.reset()
-    tick = 0
-
-    # Generate random actions with proper shape: [cache_size, num_agents, action_dim]
-    actions = np.random.uniform(-3.0, 3.0, (atn_cache, env.num_agents, 2))
-
-    import time
-    start = time.time()
-    while time.time() - start < timeout:
-        atn = actions[tick % atn_cache]
-        env.step(atn)
-        tick += 1
-
-    print(f'SPS: {env.num_agents * tick / (time.time() - start)}')
-
-
-if __name__ == '__main__':
-    test_performance()

From a0e81416a93d3ceadae98ba2c3025c9e444bf092 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 11 Apr 2026 17:13:55 -0400
Subject: [PATCH 42/82] correct most recent factor values

---
 config/boids.ini | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 0e6c8d33e6..11f931ecd0 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -12,9 +12,9 @@ num_boids = 64
 ; num_envs = 1
 ; num_boids = 1
 margin_turn_factor = 1.0
-cohesion_factor = 0.0
-seperation_factor = 0.0
-alignment_factor = 0.0
+cohesion_factor = 0.0048
+separation_factor = 0.0128
+alignment_factor = 0.2
 
 [vec]
 num_workers = 2

From d1f9ee2d41e65f4bd288cdfeac0ff88e8a219aa5 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 11 Apr 2026 18:15:54 -0400
Subject: [PATCH 43/82] add todo for factor normalization in reward calculation

---
 ocean/boids/boids.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index a56f489437..a23d44d53f 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -165,6 +165,7 @@ void c_step(Boids *env) {
         current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
 
         // reward calculation
+        // TODO: Normalize the differences because applying the factors(will allow easier sweeps)
         current_boid_reward = 0.0f, protected_count = 0.0f;
         visual_count = 0.0f, vis_vx_sum = 0.0f, vis_vy_sum = 0.0f, vis_x_sum = 0.0f, vis_y_sum = 0.0f;
         for (unsigned observed_indx = 0; observed_indx < env->num_boids; observed_indx++) {

From b978ea6e90039758d840543da85c01aac05b5749 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 14 Apr 2026 16:53:23 -0400
Subject: [PATCH 44/82] fix/update env name

---
 config/boids.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/boids.ini b/config/boids.ini
index 11f931ecd0..70361b257f 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -1,6 +1,6 @@
 [base]
 package = ocean
-env_name = puffer_boids 
+env_name = boids 
 ; policy_name = Boids
 policy_name = Policy
 rnn_name = Recurrent

From 2be7efec4acfa7cc03cdca30bab4437daada4b48 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 14 Apr 2026 17:07:25 -0400
Subject: [PATCH 45/82] converting env names to vecenv compatible naming
 schemes

---
 config/boids.ini      |  5 +++--
 ocean/boids/binding.c | 28 +++++++++++++++-------------
 ocean/boids/boids.c   | 33 +++++++++++++++------------------
 ocean/boids/boids.h   | 38 ++++++++++++++++++--------------------
 4 files changed, 51 insertions(+), 53 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 70361b257f..4de810b204 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -8,9 +8,10 @@ rnn_name = Recurrent
 
 [env]
 num_envs = 64
-num_boids = 64
+num_agents = 64
 ; num_envs = 1
-; num_boids = 1
+; num_agents = 1
+report_interval = 1
 margin_turn_factor = 1.0
 cohesion_factor = 0.0048
 separation_factor = 0.0128
diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index d8e5820169..3d529ac5e0 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -1,21 +1,23 @@
 #include "boids.h"
+#define OBS_SIZE 256 // 20 boids * 8 obs per boid
+#define NUM_ATNS 2   // Two discrete actions per boid
+#define ACT_SIZES {5, 5}
+#define OBS_TENSOR_T FloatTensor
 
 #define Env Boids
-#include "../env_binding.h"
+#include "vecenv.h"
 
-static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
-    env->num_boids = unpack(kwargs, "num_boids");
-    env->report_interval = unpack(kwargs, "report_interval");
-    env->margin_turn_factor = unpack(kwargs, "margin_turn_factor");
-    env->cohesion_factor = unpack(kwargs, "cohesion_factor");
-    env->separation_factor = unpack(kwargs, "separation_factor");
-    env->alignment_factor = unpack(kwargs, "alignment_factor");
+void my_init(Env* env, Dict* kwargs) {
+    env->num_agents = (unsigned int)dict_get(kwargs, "num_agents")->value;
+    env->report_interval = (unsigned)dict_get(kwargs, "report_interval")->value;
+    env->margin_turn_factor = (float)dict_get(kwargs, "margin_turn_factor")->value;
+    env->cohesion_factor = (float)dict_get(kwargs, "cohesion_factor")->value;
+    env->separation_factor = (float)dict_get(kwargs, "separation_factor")->value;
+    env->alignment_factor = (float)dict_get(kwargs, "alignment_factor")->value;
     init(env);
-    return 0;
 }
 
-static int my_log(PyObject* dict, Log* log) {
-    assign_to_dict(dict, "score", log->score);
-    assign_to_dict(dict, "n", log->n);
-    return 0;
+void my_log(Log* log, Dict* out) {
+    dict_set(out, "score", log->score);
+    dict_set(out, "n", log->n);
 }
diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index 0573cb9735..e5ff609177 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -1,48 +1,45 @@
 // Standalone C demo for Boids environment
-// Compile using: ./scripts/build_ocean.sh boids [local|fast]
+// Compile using: ./scripts/build.sh boids [local|fast]
 // Run with: ./boids
 
+
+// TODO: Make the actions as original boids, not just random action. The demo should show how normal boids works/behaves.
+
 #include <time.h>
 #include "boids.h"
+#include <stdlib.h>
 
 // --- Demo Configuration ---
-#define NUM_BOIDS_DEMO 20   // Number of boids for the standalone demo
+#define num_agents_DEMO 32  // Number of boids for the standalone demo
 #define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
 #define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
-#define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
 #define MARGIN_TURN_FACTOR 1.0
 #define COHESION_FACTOR 0.0
 #define SEPARATION_FACTOR 0.0
 #define ALIGNMENT_FACTOR 0.0
 
-// Dummy action generation: random velocity changes for each boid
+// Dummy action generation: random discrete values in [0, 4] for each boid action dim
 void generate_dummy_actions(Boids* env) {
-    for (unsigned int i = 0; i < env->num_boids; ++i) {
-        // Generate random floats in [-1, 1] range
-        float rand_vx = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
-        float rand_vy = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
-        
-        // Scale to the action space [-ACTION_SCALE, ACTION_SCALE]
-        env->actions[i * 2 + 0] = rand_vx * ACTION_SCALE;
-        env->actions[i * 2 + 1] = rand_vy * ACTION_SCALE;
+    for (unsigned int i = 0; i < env->num_agents; ++i) {
+        env->actions[i * 2] = rand() % 5;
+        env->actions[i * 2 + 1] = rand() % 5;
     }
 }
 
 void demo() {
-    // Initialize Boids environment struct
     Boids env = {0}; 
-    env.num_boids = NUM_BOIDS_DEMO;
+    env.num_agents = num_agents_DEMO;
     env.report_interval = REPORT_INTERVAL_DEMO;
     env.margin_turn_factor = MARGIN_TURN_FACTOR;
     env.cohesion_factor = COHESION_FACTOR;
     env.separation_factor = SEPARATION_FACTOR;
     env.alignment_factor = ALIGNMENT_FACTOR;
     
-    size_t obs_size = env.num_boids * env.num_boids * 9; // 9 = (x, y, vx, vy, dx, dy, dist, dvx, dvy)
-    size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy)
+    size_t obs_size = env.num_agents * env.num_agents * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
+    size_t act_size = env.num_agents * 2; // the 2 = (dvx, dvy)
     env.observations = (float*)calloc(obs_size, sizeof(float));
     env.actions = (float*)calloc(act_size, sizeof(float));
-    env.rewards = (float*)calloc(env.num_boids, sizeof(float)); // Env-level reward
+    env.rewards = (float*)calloc(env.num_agents, sizeof(float)); // Env-level reward
     
     if (!env.observations || !env.actions || !env.rewards) {
         fprintf(stderr, "ERROR: Failed to allocate memory for demo buffers.\n");
@@ -65,7 +62,7 @@ void demo() {
     c_reset(&env);
     int total_steps = 0;
 
-    printf("Starting Boids demo with %d boids. Press ESC to exit.\n", env.num_boids);
+    printf("Starting Boids demo with %u boids. Press ESC to exit.\n", env.num_agents);
 
     while (!WindowShouldClose() && total_steps < MAX_STEPS_DEMO) { // Raylib function to check if ESC is pressed or window closed
         generate_dummy_actions(&env);
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index a23d44d53f..3df09c92cc 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -1,7 +1,6 @@
 #include <stdlib.h>
-#include <stdbool.h>
-#include <stdio.h>
 #include <math.h>
+#include <stdio.h>
 #include <string.h>
 #include <limits.h>
 #include <stdbool.h>
@@ -51,7 +50,7 @@ typedef struct {
     float* rewards;
     unsigned char* terminals; // Not being used but is required by env_binding.h
     Boid* boids;
-    unsigned int num_boids;
+    unsigned num_agents;
     float margin_turn_factor;
     float cohesion_factor;
     float separation_factor;
@@ -60,6 +59,7 @@ typedef struct {
     Log log;
     unsigned report_interval;
     Client* client;
+    unsigned rng; // unused but required field for vecenv compatibility
 } Boids;
 
 static inline float flmax(float a, float b) { return a > b ? a : b; }
@@ -75,19 +75,19 @@ static void respawn_boid(Boids *env, unsigned int i) {
 }
 
 void init(Boids *env) {
-    if(env->num_boids < 1) {
-        printf("ERROR: num_boids must be bigger than 0\n");
+    if(env->num_agents < 1) {
+        printf("ERROR: num_agents must be bigger than 0\n");
         exit(1);
     }
     if (env->report_interval < 1) {
         printf("ERROR: report_interval must be bigger than 0\n");
         exit(1);
     }
-    env->boids = (Boid*)calloc(env->num_boids, sizeof(Boid));
+    env->boids = (Boid*)calloc(env->num_agents, sizeof(Boid));
     env->log = (Log){0};
     env->tick = 0;
 
-    for (unsigned current_indx = 0; current_indx < env->num_boids; current_indx++) {
+    for (unsigned current_indx = 0; current_indx < env->num_agents; current_indx++) {
         env->boids[current_indx].x = rndf(LEFT_MARGIN, WIDTH  - RIGHT_MARGIN);
         env->boids[current_indx].y = rndf(BOTTOM_MARGIN, HEIGHT - TOP_MARGIN);
         env->boids[current_indx].velocity.x = 0;
@@ -99,17 +99,17 @@ void init(Boids *env) {
 static void compute_observations(Boids *env) {
     int idx = 0;
     float diff_x, diff_y, dist;
-    for (unsigned i=0; i<env->num_boids; i++) {
+    for (unsigned i=0; i<env->num_agents; i++) {
         // observations for the current boid
         env->observations[idx++] = env->boids[i].x / WIDTH;
         env->observations[idx++] = env->boids[i].y / HEIGHT;
         env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP;
         env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP;
-        // zeros for relative observations since comparing to itself will always be 0
-        for (unsigned j=0; j<5; j++) { env->observations[idx++] = 0; }
+        // zeros for relative observations since comparing to itself will always be 0 (dx, dy, dvx, dvy)
+        for (unsigned j=0; j<4; j++) { env->observations[idx++] = 0; }
 
         // observations for the other boids compared to the current boid
-        for (unsigned j=0; j<env->num_boids; j++) {
+        for (unsigned j=0; j<env->num_agents; j++) {
             if (i == j) continue;
             diff_x = env->boids[i].x - env->boids[j].x;
             diff_y = env->boids[i].y - env->boids[j].y;
@@ -121,7 +121,6 @@ static void compute_observations(Boids *env) {
             env->observations[idx++] = env->boids[j].velocity.y / VELOCITY_CAP;
             env->observations[idx++] = diff_x / WIDTH;
             env->observations[idx++] = diff_y / HEIGHT;
-            env->observations[idx++] = dist / MAX_DIST;
             env->observations[idx++] = (env->boids[i].velocity.x - env->boids[j].velocity.x) / VELOCITY_CAP;
             env->observations[idx++] = (env->boids[i].velocity.y - env->boids[j].velocity.y) / VELOCITY_CAP;
         }
@@ -131,7 +130,7 @@ static void compute_observations(Boids *env) {
 void c_reset(Boids *env) {
     env->log = (Log){0};
     env->tick = 0;
-    for (unsigned boid_indx = 0; boid_indx < env->num_boids; boid_indx++) {
+    for (unsigned boid_indx = 0; boid_indx < env->num_agents; boid_indx++) {
         respawn_boid(env, boid_indx);
     }
     compute_observations(env);
@@ -151,24 +150,23 @@ void c_step(Boids *env) {
     env->rewards[0] = 0;
     env->log.score = 0;
     env->log.n = 0;
-    for (unsigned current_indx = 0; current_indx < env->num_boids; current_indx++) {
+    for (unsigned current_indx = 0; current_indx < env->num_agents; current_indx++) {
         // apply action
         current_boid = &env->boids[current_indx];
         if (manual_control) {
             current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP);
             current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
         } else {
-            current_boid->velocity.x = flclip(current_boid->velocity.x + env->actions[current_indx * 2 + 0], -VELOCITY_CAP, VELOCITY_CAP);
-            current_boid->velocity.y = flclip(current_boid->velocity.y + env->actions[current_indx * 2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.x = flclip(current_boid->velocity.x + env->actions[current_indx*2], -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.y = flclip(current_boid->velocity.y + env->actions[current_indx*2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
         }
         current_boid->x = flclip(current_boid->x + current_boid->velocity.x, 0, WIDTH  - BOID_WIDTH);
         current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
 
         // reward calculation
-        // TODO: Normalize the differences because applying the factors(will allow easier sweeps)
         current_boid_reward = 0.0f, protected_count = 0.0f;
         visual_count = 0.0f, vis_vx_sum = 0.0f, vis_vy_sum = 0.0f, vis_x_sum = 0.0f, vis_y_sum = 0.0f;
-        for (unsigned observed_indx = 0; observed_indx < env->num_boids; observed_indx++) {
+        for (unsigned observed_indx = 0; observed_indx < env->num_agents; observed_indx++) {
             if (current_indx == observed_indx) continue;
             observed_boid = env->boids[observed_indx];
             diff_x = current_boid->x - observed_boid.x;
@@ -185,7 +183,7 @@ void c_step(Boids *env) {
             }
         }
         if (protected_count > 0) {
-            // protected_range_diff = (float)(env->num_boids - protected_count) - protected_count;
+            // protected_range_diff = (float)(env->num_agents - protected_count) - protected_count;
             // current_boid_reward += protected_range_diff * env->seperation_factor;
 
             current_boid_reward -= protected_count * env->separation_factor;
@@ -287,7 +285,7 @@ void c_render(Boids* env) {
         BeginDrawing();
         ClearBackground((Color){6, 24, 24, 255});
 
-        for (unsigned boid_indx = 0; boid_indx < env->num_boids; boid_indx++) {
+        for (unsigned boid_indx = 0; boid_indx < env->num_agents; boid_indx++) {
             DrawTexturePro(
                 env->client->boid_texture,
                 (Rectangle){

From e0eb22ef934d755aa5d7fe9b17bc2743113a6deb Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 14 Apr 2026 17:08:18 -0400
Subject: [PATCH 46/82] condense boids stuct comments

---
 ocean/boids/boids.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 3df09c92cc..b7d73321a8 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -39,16 +39,14 @@ typedef struct {
 
 typedef struct Client Client;
 typedef struct {
-    // Flat array of shape (num_boids * 8) values:
+    // Flat array of shape (num_agents * 8) values:
     // - Each boid has 8 values corresponding to (x, y, vx, vy, dx, dy, dvx, dvy)
     // - The first 8 values are for the boid itself
-    // - All the other 8 values for the other boids
+    // - All the other 8 values are for the other boids
     float* observations;
-    // an array of shape (num_boids, 2) with the 2 values correspoinding to (dvx, dvy)
-    float* actions;
-    // an array of shape (1) with the summed up reward for all boids
-    float* rewards;
-    unsigned char* terminals; // Not being used but is required by env_binding.h
+    float* actions; // size (num_agents, 2->(dvx, dvy)) 
+    float* rewards; // size (num_agents) with per-boid rewards
+    float* terminals;
     Boid* boids;
     unsigned num_agents;
     float margin_turn_factor;

From 72b12813da73ad9a8528b22ea0a2b2a706918901 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 14 Apr 2026 17:14:06 -0400
Subject: [PATCH 47/82] removing commented unnormalized rewards

---
 ocean/boids/boids.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index b7d73321a8..8cb1b747b9 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -205,7 +205,6 @@ void c_step(Boids *env) {
         }
 
         // Normalization
-        // env->rewards[current_indx] = current_boid_reward;
         env->rewards[current_indx] = current_boid_reward / 6.0f;
         // env->rewards[current_indx] = current_boid_reward / 205.0f;
         // env->rewards[current_indx] = current_boid_reward / 10.0f;

From 4e4e0f3a9f7ea4b5f528e5f8155888fc6097f56c Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 14 Apr 2026 17:22:01 -0400
Subject: [PATCH 48/82] remove unused dist var

---
 ocean/boids/boids.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 8cb1b747b9..23fd69580f 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -96,7 +96,7 @@ void init(Boids *env) {
 
 static void compute_observations(Boids *env) {
     int idx = 0;
-    float diff_x, diff_y, dist;
+    float diff_x, diff_y;
     for (unsigned i=0; i<env->num_agents; i++) {
         // observations for the current boid
         env->observations[idx++] = env->boids[i].x / WIDTH;
@@ -111,7 +111,6 @@ static void compute_observations(Boids *env) {
             if (i == j) continue;
             diff_x = env->boids[i].x - env->boids[j].x;
             diff_y = env->boids[i].y - env->boids[j].y;
-            dist = sqrtf(diff_x*diff_x + diff_y*diff_y);
 
             env->observations[idx++] = env->boids[j].x / WIDTH;
             env->observations[idx++] = env->boids[j].y / HEIGHT;

From a8bff56a554b2cf44233ff5883f839987aed2f87 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 18 Apr 2026 12:23:39 -0400
Subject: [PATCH 49/82] bring random action back from 4.0

---
 ocean/boids/boids.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index e5ff609177..fc84a4bd92 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -3,8 +3,6 @@
 // Run with: ./boids
 
 
-// TODO: Make the actions as original boids, not just random action. The demo should show how normal boids works/behaves.
-
 #include <time.h>
 #include "boids.h"
 #include <stdlib.h>
@@ -18,11 +16,12 @@
 #define SEPARATION_FACTOR 0.0
 #define ALIGNMENT_FACTOR 0.0
 
-// Dummy action generation: random discrete values in [0, 4] for each boid action dim
 void generate_dummy_actions(Boids* env) {
     for (unsigned int i = 0; i < env->num_agents; ++i) {
-        env->actions[i * 2] = rand() % 5;
-        env->actions[i * 2 + 1] = rand() % 5;
+        float rand_vx = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
+        float rand_vy = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
+        env->actions[i * 2 + 0] = rand_vx * ACTION_SCALE;
+        env->actions[i * 2 + 1] = rand_vy * ACTION_SCALE;
     }
 }
 

From 74d8f0d8a2a330643e2d1476ccf176afb1375895 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 18 Apr 2026 12:26:32 -0400
Subject: [PATCH 50/82] add missing constant

---
 ocean/boids/boids.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index fc84a4bd92..13fb5efa65 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -11,6 +11,7 @@
 #define num_agents_DEMO 32  // Number of boids for the standalone demo
 #define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
 #define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
+#define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
 #define MARGIN_TURN_FACTOR 1.0
 #define COHESION_FACTOR 0.0
 #define SEPARATION_FACTOR 0.0

From 1c0fb4e221a26f24321da70e56ef3a61728d53e7 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 18 Apr 2026 12:29:33 -0400
Subject: [PATCH 51/82] remove unused vars assignment and constants

---
 ocean/boids/boids.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
index 13fb5efa65..f1f978b44a 100644
--- a/ocean/boids/boids.c
+++ b/ocean/boids/boids.c
@@ -12,10 +12,6 @@
 #define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
 #define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
 #define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
-#define MARGIN_TURN_FACTOR 1.0
-#define COHESION_FACTOR 0.0
-#define SEPARATION_FACTOR 0.0
-#define ALIGNMENT_FACTOR 0.0
 
 void generate_dummy_actions(Boids* env) {
     for (unsigned int i = 0; i < env->num_agents; ++i) {
@@ -30,10 +26,6 @@ void demo() {
     Boids env = {0}; 
     env.num_agents = num_agents_DEMO;
     env.report_interval = REPORT_INTERVAL_DEMO;
-    env.margin_turn_factor = MARGIN_TURN_FACTOR;
-    env.cohesion_factor = COHESION_FACTOR;
-    env.separation_factor = SEPARATION_FACTOR;
-    env.alignment_factor = ALIGNMENT_FACTOR;
     
     size_t obs_size = env.num_agents * env.num_agents * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
     size_t act_size = env.num_agents * 2; // the 2 = (dvx, dvy)

From 4e4ee92a80f92a74fe63e9277329cf4b37b3fde2 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 19 Apr 2026 10:53:22 -0400
Subject: [PATCH 52/82] fixing rewards in progress

---
 ocean/boids/binding.c |  1 +
 ocean/boids/boids.h   | 72 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index 3d529ac5e0..5e5de6b753 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -18,6 +18,7 @@ void my_init(Env* env, Dict* kwargs) {
 }
 
 void my_log(Log* log, Dict* out) {
+    dict_set(out, "perf", log->perf);
     dict_set(out, "score", log->score);
     dict_set(out, "n", log->n);
 }
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 23fd69580f..f4301d92a3 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -20,8 +20,10 @@
 #define BOID_HEIGHT 32
 #define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png"
 #define MAX_DIST 2000
+#define EPS 1e-8f // avoids div by zero in angle calc
 
 typedef struct {
+    float perf;
     float score;
     float n;
 } Log;
@@ -64,6 +66,17 @@ static inline float flmax(float a, float b) { return a > b ? a : b; }
 static inline float flmin(float a, float b) { return a > b ? b : a; }
 static inline float flclip(float x,float lo,float hi) { return flmin(hi,flmax(lo,x)); }
 static inline float rndf(float lo,float hi) { return lo + (float)rand()/(float)RAND_MAX*(hi-lo); }
+static inline float velocity_angle_diff(float ref_x, float ref_y, float actual_x, float actual_y) {
+    float ref_mag = sqrtf(ref_x*ref_x + ref_y*ref_y);
+    float actual_mag = sqrtf(actual_x*actual_x + actual_y*actual_y);
+    float denom, cos_theta;
+
+    if (ref_mag <= EPS && actual_mag <= EPS) return 0.0f;
+
+    denom = ref_mag * actual_mag + EPS;
+    cos_theta = flclip((ref_x*actual_x + ref_y*actual_y) / denom, -1.0f, 1.0f);
+    return acosf(cos_theta);
+}
 
 static void respawn_boid(Boids *env, unsigned int i) {
     env->boids[i].x = rndf(LEFT_MARGIN, WIDTH  - RIGHT_MARGIN);
@@ -138,13 +151,16 @@ void c_step(Boids *env) {
     Boid observed_boid;
     float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
     float diff_x, diff_y, dist, current_boid_reward;
+    float protected_x_sum, protected_y_sum, separation_x, separation_y;
+    float normal_vx, normal_vy, angle_diff;
     unsigned visual_count, protected_count;
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
     float mouse_x = (float)GetMouseX();
     float mouse_y = (float)GetMouseY();
 
     env->tick++;
-    env->rewards[0] = 0;
+    env->rewards[0] = 0.0;
+    env->log.perf = 0;
     env->log.score = 0;
     env->log.n = 0;
     for (unsigned current_indx = 0; current_indx < env->num_agents; current_indx++) {
@@ -152,7 +168,7 @@ void c_step(Boids *env) {
         current_boid = &env->boids[current_indx];
         if (manual_control) {
             current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP);
-            current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
+           current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
         } else {
             current_boid->velocity.x = flclip(current_boid->velocity.x + env->actions[current_indx*2], -VELOCITY_CAP, VELOCITY_CAP);
             current_boid->velocity.y = flclip(current_boid->velocity.y + env->actions[current_indx*2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
@@ -161,8 +177,17 @@ void c_step(Boids *env) {
         current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
 
         // reward calculation
-        current_boid_reward = 0.0f, protected_count = 0.0f;
-        visual_count = 0.0f, vis_vx_sum = 0.0f, vis_vy_sum = 0.0f, vis_x_sum = 0.0f, vis_y_sum = 0.0f;
+        current_boid_reward = 0.0f;
+        protected_count = 0;
+        visual_count = 0;
+        vis_vx_sum = 0.0f;
+        vis_vy_sum = 0.0f;
+        vis_x_sum = 0.0f;
+        vis_y_sum = 0.0f;
+        protected_x_sum = 0.0f;
+        protected_y_sum = 0.0f;
+        normal_vx = current_boid->velocity.x;
+        normal_vy = current_boid->velocity.y;
         for (unsigned observed_indx = 0; observed_indx < env->num_agents; observed_indx++) {
             if (current_indx == observed_indx) continue;
             observed_boid = env->boids[observed_indx];
@@ -171,6 +196,8 @@ void c_step(Boids *env) {
             dist = sqrtf(diff_x*diff_x + diff_y*diff_y);
             if (dist < PROTECTED_RANGE) {
                 protected_count++;
+                protected_x_sum += diff_x;
+                protected_y_sum += diff_y;
             } else if (dist < VISUAL_RANGE) {
                 vis_x_sum += observed_boid.x;
                 vis_y_sum += observed_boid.y;
@@ -184,6 +211,10 @@ void c_step(Boids *env) {
             // current_boid_reward += protected_range_diff * env->seperation_factor;
 
             current_boid_reward -= protected_count * env->separation_factor;
+            separation_x = protected_x_sum / protected_count;
+            separation_y = protected_y_sum / protected_count;
+            normal_vx += separation_x * env->separation_factor;
+            normal_vy += separation_y * env->separation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;
@@ -195,21 +226,44 @@ void c_step(Boids *env) {
             current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
             current_boid_reward -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
             current_boid_reward -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
+
+            normal_vx += (vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
+            normal_vy += (vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
+            normal_vx += (vis_x_avg  - current_boid->x) * env->cohesion_factor;
+            normal_vy += (vis_y_avg  - current_boid->y) * env->cohesion_factor;
         }
-        if (current_boid->y < TOP_MARGIN || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) {
+        if (current_boid->y < TOP_MARGIN
+            || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN
+            || current_boid->x < LEFT_MARGIN
+            || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN
+        ) {
             current_boid_reward -= env->margin_turn_factor;
         }
-        if (current_boid->x < LEFT_MARGIN || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) {
-            current_boid_reward -= env->margin_turn_factor;
+
+        if (current_boid->y < TOP_MARGIN) {
+            normal_vy += env->margin_turn_factor;
+        } else if (current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) {
+            normal_vy -= env->margin_turn_factor;
         }
+        if (current_boid->x < LEFT_MARGIN) {
+            normal_vx += env->margin_turn_factor;
+        } else if (current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) {
+            normal_vx -= env->margin_turn_factor;
+        }
+
+        normal_vx = flclip(normal_vx, -VELOCITY_CAP, VELOCITY_CAP);
+        normal_vy = flclip(normal_vy, -VELOCITY_CAP, VELOCITY_CAP);
+        angle_diff = velocity_angle_diff(normal_vx, normal_vy, current_boid->velocity.x, current_boid->velocity.y);
+        printf("%f, %f || %f, %f = %f\n", normal_vx, normal_vy, current_boid->velocity.x, current_boid->velocity.y, angle_diff);
 
         // Normalization
-        env->rewards[current_indx] = current_boid_reward / 6.0f;
+        // env->rewards[current_indx] = current_boid_reward / 6.0f;
         // env->rewards[current_indx] = current_boid_reward / 205.0f;
-        // env->rewards[current_indx] = current_boid_reward / 10.0f;
+        env->rewards[current_indx] = current_boid_reward / 10.0f;
 
         //log updates
         if (env->tick == env->report_interval) {
+            env->log.perf           += angle_diff;
             env->log.score          += env->rewards[current_indx];
             env->log.n              += 1.0f;
             env->tick = 0;

From 787b6633e23b83aaf0eb5b3b1408147d656eb150 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 23 Apr 2026 20:54:35 -0400
Subject: [PATCH 53/82] normalize factors

Normalized the separation, alignment, and cohesion rule contributions to unit vectors scaled by their factors, and fixed the global velocity clip in boids.h to use the actual magnitude of normal_v and only rescale when it exceeds VELOCITY_CAP (true Option B), replacing the previous per-axis flclip and a broken clip that was dividing by leftover rule_dx/rule_dy values.
---
 ocean/boids/boids.h | 52 ++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index f4301d92a3..6e166cd0a1 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -151,8 +151,9 @@ void c_step(Boids *env) {
     Boid observed_boid;
     float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
     float diff_x, diff_y, dist, current_boid_reward;
-    float protected_x_sum, protected_y_sum, separation_x, separation_y;
+    float protected_x_sum, protected_y_sum;
     float normal_vx, normal_vy, angle_diff;
+    float rule_dx, rule_dy, rule_mag;
     unsigned visual_count, protected_count;
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
     float mouse_x = (float)GetMouseX();
@@ -186,8 +187,8 @@ void c_step(Boids *env) {
         vis_y_sum = 0.0f;
         protected_x_sum = 0.0f;
         protected_y_sum = 0.0f;
-        normal_vx = current_boid->velocity.x;
-        normal_vy = current_boid->velocity.y;
+        normal_vx = 0.0f;
+        normal_vy = 0.0f;
         for (unsigned observed_indx = 0; observed_indx < env->num_agents; observed_indx++) {
             if (current_indx == observed_indx) continue;
             observed_boid = env->boids[observed_indx];
@@ -210,11 +211,10 @@ void c_step(Boids *env) {
             // protected_range_diff = (float)(env->num_agents - protected_count) - protected_count;
             // current_boid_reward += protected_range_diff * env->seperation_factor;
 
-            current_boid_reward -= protected_count * env->separation_factor;
-            separation_x = protected_x_sum / protected_count;
-            separation_y = protected_y_sum / protected_count;
-            normal_vx += separation_x * env->separation_factor;
-            normal_vy += separation_y * env->separation_factor;
+            rule_mag = sqrtf(protected_x_sum*protected_x_sum + protected_y_sum*protected_y_sum) + EPS;
+            normal_vx += (protected_x_sum / rule_mag) * env->separation_factor;
+            normal_vy += (protected_y_sum / rule_mag) * env->separation_factor;
+            current_boid_reward -= rule_mag * env->separation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;
@@ -222,15 +222,22 @@ void c_step(Boids *env) {
             vis_vx_avg = vis_vx_sum / visual_count;
             vis_vy_avg = vis_vy_sum / visual_count;
 
-            current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
-            current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
             current_boid_reward -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
             current_boid_reward -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
 
-            normal_vx += (vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
-            normal_vy += (vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
-            normal_vx += (vis_x_avg  - current_boid->x) * env->cohesion_factor;
-            normal_vy += (vis_y_avg  - current_boid->y) * env->cohesion_factor;
+            rule_dx = vis_vx_avg - current_boid->velocity.x;
+            rule_dy = vis_vy_avg - current_boid->velocity.y;
+            rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
+            normal_vx += (rule_dx / rule_mag) * env->alignment_factor;
+            normal_vy += (rule_dy / rule_mag) * env->alignment_factor;
+            current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
+            current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
+
+            rule_dx = vis_x_avg - current_boid->x;
+            rule_dy = vis_y_avg - current_boid->y;
+            rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
+            normal_vx += (rule_dx / rule_mag) * env->cohesion_factor;
+            normal_vy += (rule_dy / rule_mag) * env->cohesion_factor;
         }
         if (current_boid->y < TOP_MARGIN
             || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN
@@ -244,17 +251,19 @@ void c_step(Boids *env) {
             normal_vy += env->margin_turn_factor;
         } else if (current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) {
             normal_vy -= env->margin_turn_factor;
-        }
-        if (current_boid->x < LEFT_MARGIN) {
+        } else if (current_boid->x < LEFT_MARGIN) {
             normal_vx += env->margin_turn_factor;
         } else if (current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) {
             normal_vx -= env->margin_turn_factor;
         }
 
-        normal_vx = flclip(normal_vx, -VELOCITY_CAP, VELOCITY_CAP);
-        normal_vy = flclip(normal_vy, -VELOCITY_CAP, VELOCITY_CAP);
+        float n_mag = sqrtf(normal_vx*normal_vx + normal_vy*normal_vy);
+        if (n_mag > VELOCITY_CAP) {
+            normal_vx = (normal_vx / n_mag) * VELOCITY_CAP;
+            normal_vy = (normal_vy / n_mag) * VELOCITY_CAP;
+        }
         angle_diff = velocity_angle_diff(normal_vx, normal_vy, current_boid->velocity.x, current_boid->velocity.y);
-        printf("%f, %f || %f, %f = %f\n", normal_vx, normal_vy, current_boid->velocity.x, current_boid->velocity.y, angle_diff);
+        printf("%f, %f || %f, %f = %f\n", current_boid->velocity.x, current_boid->velocity.y, normal_vx, normal_vy, angle_diff);
 
         // Normalization
         // env->rewards[current_indx] = current_boid_reward / 6.0f;
@@ -266,10 +275,13 @@ void c_step(Boids *env) {
             env->log.perf           += angle_diff;
             env->log.score          += env->rewards[current_indx];
             env->log.n              += 1.0f;
-            env->tick = 0;
         }
     }
 
+    if (env->tick == env->report_interval) env->tick = 0;
+    printf("===================================================================================\n");
+    printf("===================================================================================\n");
+    printf("===================================================================================\n");
     compute_observations(env);
 }
 

From 31e580a9cb373e80828bdd23545169d704066169 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 24 Apr 2026 18:48:49 -0400
Subject: [PATCH 54/82] improve commented swep config, enable it

---
 config/boids.ini | 75 +++++++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 42 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 4de810b204..b0c5fcd316 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -10,7 +10,7 @@ rnn_name = Recurrent
 num_envs = 64
 num_agents = 64
 ; num_envs = 1
-; num_agents = 1
+; num_agents = 5
 report_interval = 1
 margin_turn_factor = 1.0
 cohesion_factor = 0.0048
@@ -18,9 +18,9 @@ separation_factor = 0.0128
 alignment_factor = 0.2
 
 [vec]
-num_workers = 2
-num_envs = 2
-batch_size = auto
+total_agents = 4096
+num_buffers = 8
+num_threads = 8
 
 [train]
 total_timesteps = 100_000_000
@@ -29,46 +29,37 @@ learning_rate = 0.025
 minibatch_size = 16384
 ; minibatch_size = 1
 
-; [sweep]
-; method = protein
-; metric = episode_length
+[sweep]
+method = Protein
+metric = perf
+sweep_only = margin_turn_factor, cohesion_factor, separation_factor, alignment_factor
 
-; [sweep.train.total_timesteps]
-; distribution = log_normal
-; min = 1e6
-; max = 1e7
-; mean = 5e6
-; scale = 0.5
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 1e3
+max = 1e7
+scale = time
 
-; [sweep.train.gamma]
-; distribution = log_normal
-; min = 0.9
-; max = 0.999
-; mean = 0.97
+[sweep.env.margin_turn_factor]
+distribution = log_normal
+min = 0.01
+max = 5.0
+scale = auto
 
-; [sweep.train.gae_lambda]
-; distribution = log_normal
-; min = 0.7
-; max = 0.999
-; mean = 0.95
-
-; [sweep.train.learning_rate]
-; distribution = log_normal
-; min = 0.0001
-; max = 0.001
-; mean = 0.00025
-; scale = 0.5
-
-; [sweep.train.batch_size]
-; min = 32768
-; max = 131072
-; mean = 65536
-; scale = 0.5
-
-; [sweep.train.minibatch_size]
-; min = 512
-; max = 2048
-; mean = 1024
-; scale = 0.5
+[sweep.env.cohesion_factor]
+distribution = log_normal
+min = 0.01
+max = 1
+scale = auto
 
+[sweep.env.separation_factor]
+distribution = log_normal
+min = 0.01
+max = 1
+scale = auto
 
+[sweep.env.alignment_factor]
+distribution = log_normal
+min = 0.01
+max = 1
+scale = auto
\ No newline at end of file

From 10ea4357cc3e4006b79118534dc2e1b56a711db7 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 24 Apr 2026 18:49:47 -0400
Subject: [PATCH 55/82] comment debug prints and adjust reward normalization

---
 ocean/boids/boids.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 6e166cd0a1..8fad4a183d 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -263,12 +263,12 @@ void c_step(Boids *env) {
             normal_vy = (normal_vy / n_mag) * VELOCITY_CAP;
         }
         angle_diff = velocity_angle_diff(normal_vx, normal_vy, current_boid->velocity.x, current_boid->velocity.y);
-        printf("%f, %f || %f, %f = %f\n", current_boid->velocity.x, current_boid->velocity.y, normal_vx, normal_vy, angle_diff);
+        // printf("%f, %f || %f, %f = %f\n", current_boid->velocity.x, current_boid->velocity.y, normal_vx, normal_vy, angle_diff);
 
         // Normalization
-        // env->rewards[current_indx] = current_boid_reward / 6.0f;
+        env->rewards[current_indx] = current_boid_reward / 5.0f;
         // env->rewards[current_indx] = current_boid_reward / 205.0f;
-        env->rewards[current_indx] = current_boid_reward / 10.0f;
+        // env->rewards[current_indx] = current_boid_reward / 10.0f;
 
         //log updates
         if (env->tick == env->report_interval) {
@@ -279,9 +279,9 @@ void c_step(Boids *env) {
     }
 
     if (env->tick == env->report_interval) env->tick = 0;
-    printf("===================================================================================\n");
-    printf("===================================================================================\n");
-    printf("===================================================================================\n");
+    // printf("===================================================================================\n");
+    // printf("===================================================================================\n");
+    // printf("===================================================================================\n");
     compute_observations(env);
 }
 

From aa9306949a19827d4791aae251edbaf4822c0107 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 25 Apr 2026 01:17:00 -0400
Subject: [PATCH 56/82] fix binding for 64 boids

---
 ocean/boids/binding.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index 5e5de6b753..354abb9a89 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -1,5 +1,5 @@
 #include "boids.h"
-#define OBS_SIZE 256 // 20 boids * 8 obs per boid
+#define OBS_SIZE 512 // 64 boids * 8 obs per boid
 #define NUM_ATNS 2   // Two discrete actions per boid
 #define ACT_SIZES {5, 5}
 #define OBS_TENSOR_T FloatTensor

From aa881edb47948a90e1ab12f46898f8f7fc47919f Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 25 Apr 2026 22:36:20 -0400
Subject: [PATCH 57/82] fixed boids positive only actions(dumb me)

---
 ocean/boids/boids.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 8fad4a183d..ee08798459 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -169,10 +169,10 @@ void c_step(Boids *env) {
         current_boid = &env->boids[current_indx];
         if (manual_control) {
             current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP);
-           current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
         } else {
-            current_boid->velocity.x = flclip(current_boid->velocity.x + env->actions[current_indx*2], -VELOCITY_CAP, VELOCITY_CAP);
-            current_boid->velocity.y = flclip(current_boid->velocity.y + env->actions[current_indx*2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.x = flclip(current_boid->velocity.x + (env->actions[current_indx*2] - 2.0f), -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.y = flclip(current_boid->velocity.y + (env->actions[current_indx*2 + 1] - 2.0f), -VELOCITY_CAP, VELOCITY_CAP);
         }
         current_boid->x = flclip(current_boid->x + current_boid->velocity.x, 0, WIDTH  - BOID_WIDTH);
         current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);

From 6b21d962ace4ebcfe5486eab49fe5c1383a83eac Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 25 Apr 2026 22:42:32 -0400
Subject: [PATCH 58/82] reduce actions to -1,0,1 (hopefully)

---
 ocean/boids/binding.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index 354abb9a89..8e78c19522 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -1,7 +1,7 @@
 #include "boids.h"
 #define OBS_SIZE 512 // 64 boids * 8 obs per boid
 #define NUM_ATNS 2   // Two discrete actions per boid
-#define ACT_SIZES {5, 5}
+#define ACT_SIZES {3, 3}
 #define OBS_TENSOR_T FloatTensor
 
 #define Env Boids

From 0b4d6187e7064eb566ce7b10fa445e9a7c3199cd Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 2 May 2026 03:23:14 -0400
Subject: [PATCH 59/82] Update boid action offset and adjust reward factor
 testing

Action offset changed from 2.0f to 1.0f. Reward
normalization changed from division by 5.0f to 50.0f.
Added floats to Raylib rect/vector params. Updated
default boid config to isolate cohesion.
---
 config/boids.ini    | 12 ++++++++----
 ocean/boids/boids.h | 36 ++++++++++++++++++------------------
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index b0c5fcd316..06a42a5541 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -12,10 +12,14 @@ num_agents = 64
 ; num_envs = 1
 ; num_agents = 5
 report_interval = 1
+; margin_turn_factor = 1.0
+; cohesion_factor = 0.0048
+; separation_factor = 0.0128
+; alignment_factor = 0.02
 margin_turn_factor = 1.0
-cohesion_factor = 0.0048
-separation_factor = 0.0128
-alignment_factor = 0.2
+cohesion_factor = 1.0
+separation_factor = 0.0
+alignment_factor = 0.0
 
 [vec]
 total_agents = 4096
@@ -62,4 +66,4 @@ scale = auto
 distribution = log_normal
 min = 0.01
 max = 1
-scale = auto
\ No newline at end of file
+scale = auto
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index ee08798459..4645bb043c 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -16,8 +16,8 @@
 #define PROTECTED_RANGE 60
 #define WIDTH 1080
 #define HEIGHT 720
-#define BOID_WIDTH 32
-#define BOID_HEIGHT 32
+#define BOID_WIDTH 32.0f
+#define BOID_HEIGHT 32.0f
 #define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png"
 #define MAX_DIST 2000
 #define EPS 1e-8f // avoids div by zero in angle calc
@@ -46,7 +46,7 @@ typedef struct {
     // - The first 8 values are for the boid itself
     // - All the other 8 values are for the other boids
     float* observations;
-    float* actions; // size (num_agents, 2->(dvx, dvy)) 
+    float* actions; // size (num_agents, 2->(dvx, dvy))
     float* rewards; // size (num_agents) with per-boid rewards
     float* terminals;
     Boid* boids;
@@ -171,8 +171,8 @@ void c_step(Boids *env) {
             current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP);
             current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
         } else {
-            current_boid->velocity.x = flclip(current_boid->velocity.x + (env->actions[current_indx*2] - 2.0f), -VELOCITY_CAP, VELOCITY_CAP);
-            current_boid->velocity.y = flclip(current_boid->velocity.y + (env->actions[current_indx*2 + 1] - 2.0f), -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.x = flclip(current_boid->velocity.x + (env->actions[current_indx*2] - 1.0f), -VELOCITY_CAP, VELOCITY_CAP);
+            current_boid->velocity.y = flclip(current_boid->velocity.y + (env->actions[current_indx*2 + 1] - 1.0f), -VELOCITY_CAP, VELOCITY_CAP);
         }
         current_boid->x = flclip(current_boid->x + current_boid->velocity.x, 0, WIDTH  - BOID_WIDTH);
         current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
@@ -266,9 +266,9 @@ void c_step(Boids *env) {
         // printf("%f, %f || %f, %f = %f\n", current_boid->velocity.x, current_boid->velocity.y, normal_vx, normal_vy, angle_diff);
 
         // Normalization
-        env->rewards[current_indx] = current_boid_reward / 5.0f;
+        // env->rewards[current_indx] = current_boid_reward / 5.0f;
         // env->rewards[current_indx] = current_boid_reward / 205.0f;
-        // env->rewards[current_indx] = current_boid_reward / 10.0f;
+        env->rewards[current_indx] = current_boid_reward / 50.0f;
 
         //log updates
         if (env->tick == env->report_interval) {
@@ -307,26 +307,26 @@ void c_close(Boids* env) {
 
 Client* make_client(Boids* env) {
     Client* client = (Client*)calloc(1, sizeof(Client));
-    
+
     client->width = WIDTH;
     client->height = HEIGHT;
-    
+
     InitWindow(WIDTH, HEIGHT, "PufferLib Boids");
     SetTargetFPS(60);
-    
+
     if (!IsWindowReady()) {
         TraceLog(LOG_ERROR, "Window failed to initialize\n");
         free(client);
         return NULL;
     }
-    
+
     client->boid_texture = LoadTexture(BOID_TEXTURE_PATH);
     if (client->boid_texture.id == 0) {
         TraceLog(LOG_ERROR, "Failed to load texture: %s", BOID_TEXTURE_PATH);
         c_close_client(client);
         return NULL;
     }
-    
+
     return client;
 }
 
@@ -338,7 +338,7 @@ void c_render(Boids* env) {
             return;
         }
     }
-    
+
     if (!WindowShouldClose() && IsWindowReady()) {
         if (IsKeyDown(KEY_ESCAPE)) {
             exit(0);
@@ -351,10 +351,10 @@ void c_render(Boids* env) {
             DrawTexturePro(
                 env->client->boid_texture,
                 (Rectangle){
-                    (env->boids[boid_indx].velocity.x > 0) ? 0 : 128,
-                    0,
-                    128,
-                    128,
+                    (env->boids[boid_indx].velocity.x > 0) ? 0.0f : 128.0f,
+                    0.0f,
+                    128.0f,
+                    128.0f,
                 },
                 (Rectangle){
                     env->boids[boid_indx].x,
@@ -362,7 +362,7 @@ void c_render(Boids* env) {
                     BOID_WIDTH,
                     BOID_HEIGHT
                 },
-                (Vector2){0, 0},
+                (Vector2){0.0f, 0.0f},
                 0,
                 WHITE
             );

From b65a0f0cbd9522828a0e94a349ce3333ff552927 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Mon, 4 May 2026 23:34:13 -0400
Subject: [PATCH 60/82] Log per-factor reward contributions for boids

Track margin_turn, cohesion, separation, and alignment reward
contributions separately so each factor's effect is visible in
dashboards/wandb.
---
 ocean/boids/binding.c |  4 ++++
 ocean/boids/boids.h   | 37 ++++++++++++++++++++++++++++---------
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index 8e78c19522..bd66d6cfc3 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -20,5 +20,9 @@ void my_init(Env* env, Dict* kwargs) {
 void my_log(Log* log, Dict* out) {
     dict_set(out, "perf", log->perf);
     dict_set(out, "score", log->score);
+    dict_set(out, "margin_turn_reward", log->margin_turn_reward);
+    dict_set(out, "cohesion_reward", log->cohesion_reward);
+    dict_set(out, "separation_reward", log->separation_reward);
+    dict_set(out, "alignment_reward", log->alignment_reward);
     dict_set(out, "n", log->n);
 }
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 4645bb043c..b173a4993f 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -26,6 +26,10 @@ typedef struct {
     float perf;
     float score;
     float n;
+    float margin_turn_reward;
+    float cohesion_reward;
+    float separation_reward;
+    float alignment_reward;
 } Log;
 
 typedef struct {
@@ -151,6 +155,7 @@ void c_step(Boids *env) {
     Boid observed_boid;
     float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
     float diff_x, diff_y, dist, current_boid_reward;
+    float r_margin_turn, r_cohesion, r_separation, r_alignment;
     float protected_x_sum, protected_y_sum;
     float normal_vx, normal_vy, angle_diff;
     float rule_dx, rule_dy, rule_mag;
@@ -164,6 +169,10 @@ void c_step(Boids *env) {
     env->log.perf = 0;
     env->log.score = 0;
     env->log.n = 0;
+    env->log.margin_turn_reward = 0;
+    env->log.cohesion_reward = 0;
+    env->log.separation_reward = 0;
+    env->log.alignment_reward = 0;
     for (unsigned current_indx = 0; current_indx < env->num_agents; current_indx++) {
         // apply action
         current_boid = &env->boids[current_indx];
@@ -179,6 +188,10 @@ void c_step(Boids *env) {
 
         // reward calculation
         current_boid_reward = 0.0f;
+        r_margin_turn = 0.0f;
+        r_cohesion = 0.0f;
+        r_separation = 0.0f;
+        r_alignment = 0.0f;
         protected_count = 0;
         visual_count = 0;
         vis_vx_sum = 0.0f;
@@ -214,7 +227,7 @@ void c_step(Boids *env) {
             rule_mag = sqrtf(protected_x_sum*protected_x_sum + protected_y_sum*protected_y_sum) + EPS;
             normal_vx += (protected_x_sum / rule_mag) * env->separation_factor;
             normal_vy += (protected_y_sum / rule_mag) * env->separation_factor;
-            current_boid_reward -= rule_mag * env->separation_factor;
+            r_separation -= rule_mag * env->separation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;
@@ -222,16 +235,16 @@ void c_step(Boids *env) {
             vis_vx_avg = vis_vx_sum / visual_count;
             vis_vy_avg = vis_vy_sum / visual_count;
 
-            current_boid_reward -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
-            current_boid_reward -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
+            r_cohesion -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
+            r_cohesion -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
 
             rule_dx = vis_vx_avg - current_boid->velocity.x;
             rule_dy = vis_vy_avg - current_boid->velocity.y;
             rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
             normal_vx += (rule_dx / rule_mag) * env->alignment_factor;
             normal_vy += (rule_dy / rule_mag) * env->alignment_factor;
-            current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
-            current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
+            r_alignment -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
+            r_alignment -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
 
             rule_dx = vis_x_avg - current_boid->x;
             rule_dy = vis_y_avg - current_boid->y;
@@ -244,9 +257,11 @@ void c_step(Boids *env) {
             || current_boid->x < LEFT_MARGIN
             || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN
         ) {
-            current_boid_reward -= env->margin_turn_factor;
+            r_margin_turn -= env->margin_turn_factor;
         }
 
+        current_boid_reward -= r_margin_turn + r_cohesion + r_separation + r_alignment;
+
         if (current_boid->y < TOP_MARGIN) {
             normal_vy += env->margin_turn_factor;
         } else if (current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) {
@@ -272,9 +287,13 @@ void c_step(Boids *env) {
 
         //log updates
         if (env->tick == env->report_interval) {
-            env->log.perf           += angle_diff;
-            env->log.score          += env->rewards[current_indx];
-            env->log.n              += 1.0f;
+            env->log.perf               += angle_diff;
+            env->log.score              += env->rewards[current_indx];
+            env->log.margin_turn_reward += r_margin_turn;
+            env->log.cohesion_reward    += r_cohesion;
+            env->log.separation_reward  += r_separation;
+            env->log.alignment_reward   += r_alignment;
+            env->log.n                  += 1.0f;
         }
     }
 

From bb4bf8c8a139932ffee34e32a00ac174748dc2c8 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Mon, 4 May 2026 23:40:05 -0400
Subject: [PATCH 61/82] rename reward vars, and compress margin turn factor
 reward calc

reward vars were added to be used by logs
---
 ocean/boids/boids.h | 52 +++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index b173a4993f..be371d7f46 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -155,7 +155,7 @@ void c_step(Boids *env) {
     Boid observed_boid;
     float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
     float diff_x, diff_y, dist, current_boid_reward;
-    float r_margin_turn, r_cohesion, r_separation, r_alignment;
+    float margin_turn_reward, cohesion_reward, separation_reward, alignment_reward;
     float protected_x_sum, protected_y_sum;
     float normal_vx, normal_vy, angle_diff;
     float rule_dx, rule_dy, rule_mag;
@@ -188,10 +188,10 @@ void c_step(Boids *env) {
 
         // reward calculation
         current_boid_reward = 0.0f;
-        r_margin_turn = 0.0f;
-        r_cohesion = 0.0f;
-        r_separation = 0.0f;
-        r_alignment = 0.0f;
+        margin_turn_reward = 0.0f;
+        cohesion_reward = 0.0f;
+        separation_reward = 0.0f;
+        alignment_reward = 0.0f;
         protected_count = 0;
         visual_count = 0;
         vis_vx_sum = 0.0f;
@@ -227,7 +227,7 @@ void c_step(Boids *env) {
             rule_mag = sqrtf(protected_x_sum*protected_x_sum + protected_y_sum*protected_y_sum) + EPS;
             normal_vx += (protected_x_sum / rule_mag) * env->separation_factor;
             normal_vy += (protected_y_sum / rule_mag) * env->separation_factor;
-            r_separation -= rule_mag * env->separation_factor;
+            separation_reward -= rule_mag * env->separation_factor;
         }
         if (visual_count) {
             vis_x_avg  = vis_x_sum  / visual_count;
@@ -235,16 +235,16 @@ void c_step(Boids *env) {
             vis_vx_avg = vis_vx_sum / visual_count;
             vis_vy_avg = vis_vy_sum / visual_count;
 
-            r_cohesion -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
-            r_cohesion -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
+            cohesion_reward -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
+            cohesion_reward -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
 
             rule_dx = vis_vx_avg - current_boid->velocity.x;
             rule_dy = vis_vy_avg - current_boid->velocity.y;
             rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
             normal_vx += (rule_dx / rule_mag) * env->alignment_factor;
             normal_vy += (rule_dy / rule_mag) * env->alignment_factor;
-            r_alignment -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
-            r_alignment -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
+            alignment_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
+            alignment_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
 
             rule_dx = vis_x_avg - current_boid->x;
             rule_dy = vis_y_avg - current_boid->y;
@@ -252,25 +252,17 @@ void c_step(Boids *env) {
             normal_vx += (rule_dx / rule_mag) * env->cohesion_factor;
             normal_vy += (rule_dy / rule_mag) * env->cohesion_factor;
         }
-        if (current_boid->y < TOP_MARGIN
-            || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN
-            || current_boid->x < LEFT_MARGIN
-            || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN
-        ) {
-            r_margin_turn -= env->margin_turn_factor;
-        }
-
-        current_boid_reward -= r_margin_turn + r_cohesion + r_separation + r_alignment;
 
-        if (current_boid->y < TOP_MARGIN) {
+        margin_turn_reward -= env->margin_turn_factor;
+        if (current_boid->y < TOP_MARGIN || current_boid->x < LEFT_MARGIN) {
             normal_vy += env->margin_turn_factor;
-        } else if (current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) {
+        } else if (
+            current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN
+            || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN
+        ) {
             normal_vy -= env->margin_turn_factor;
-        } else if (current_boid->x < LEFT_MARGIN) {
-            normal_vx += env->margin_turn_factor;
-        } else if (current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) {
-            normal_vx -= env->margin_turn_factor;
-        }
+        } else { margin_turn_reward = 0; }
+        current_boid_reward -= margin_turn_reward + cohesion_reward + separation_reward + alignment_reward;
 
         float n_mag = sqrtf(normal_vx*normal_vx + normal_vy*normal_vy);
         if (n_mag > VELOCITY_CAP) {
@@ -289,10 +281,10 @@ void c_step(Boids *env) {
         if (env->tick == env->report_interval) {
             env->log.perf               += angle_diff;
             env->log.score              += env->rewards[current_indx];
-            env->log.margin_turn_reward += r_margin_turn;
-            env->log.cohesion_reward    += r_cohesion;
-            env->log.separation_reward  += r_separation;
-            env->log.alignment_reward   += r_alignment;
+            env->log.margin_turn_reward += margin_turn_reward;
+            env->log.cohesion_reward    += cohesion_reward;
+            env->log.separation_reward  += separation_reward;
+            env->log.alignment_reward   += alignment_reward;
             env->log.n                  += 1.0f;
         }
     }

From 39a9f7926a35d87e3701def047a8ba0c3d9e1b63 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Mon, 4 May 2026 23:47:27 -0400
Subject: [PATCH 62/82] rename rewards for clarification and training is
 screwed again!!!!!!!!!!

---
 ocean/boids/binding.c |  8 ++++----
 ocean/boids/boids.h   | 24 ++++++++++++------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index bd66d6cfc3..f6109adea3 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -20,9 +20,9 @@ void my_init(Env* env, Dict* kwargs) {
 void my_log(Log* log, Dict* out) {
     dict_set(out, "perf", log->perf);
     dict_set(out, "score", log->score);
-    dict_set(out, "margin_turn_reward", log->margin_turn_reward);
-    dict_set(out, "cohesion_reward", log->cohesion_reward);
-    dict_set(out, "separation_reward", log->separation_reward);
-    dict_set(out, "alignment_reward", log->alignment_reward);
+    dict_set(out, "margin_turn_reward", log->t_margin_turn_reward);
+    dict_set(out, "cohesion_reward", log->t_cohesion_reward);
+    dict_set(out, "separation_reward", log->t_separation_reward);
+    dict_set(out, "alignment_reward", log->t_alignment_reward);
     dict_set(out, "n", log->n);
 }
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index be371d7f46..69726abbaa 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -26,10 +26,10 @@ typedef struct {
     float perf;
     float score;
     float n;
-    float margin_turn_reward;
-    float cohesion_reward;
-    float separation_reward;
-    float alignment_reward;
+    float t_margin_turn_reward;
+    float t_cohesion_reward;
+    float t_separation_reward;
+    float t_alignment_reward;
 } Log;
 
 typedef struct {
@@ -169,10 +169,10 @@ void c_step(Boids *env) {
     env->log.perf = 0;
     env->log.score = 0;
     env->log.n = 0;
-    env->log.margin_turn_reward = 0;
-    env->log.cohesion_reward = 0;
-    env->log.separation_reward = 0;
-    env->log.alignment_reward = 0;
+    env->log.t_margin_turn_reward = 0;
+    env->log.t_cohesion_reward = 0;
+    env->log.t_separation_reward = 0;
+    env->log.t_alignment_reward = 0;
     for (unsigned current_indx = 0; current_indx < env->num_agents; current_indx++) {
         // apply action
         current_boid = &env->boids[current_indx];
@@ -281,10 +281,10 @@ void c_step(Boids *env) {
         if (env->tick == env->report_interval) {
             env->log.perf               += angle_diff;
             env->log.score              += env->rewards[current_indx];
-            env->log.margin_turn_reward += margin_turn_reward;
-            env->log.cohesion_reward    += cohesion_reward;
-            env->log.separation_reward  += separation_reward;
-            env->log.alignment_reward   += alignment_reward;
+            env->log.t_margin_turn_reward += margin_turn_reward;
+            env->log.t_cohesion_reward    += cohesion_reward;
+            env->log.t_separation_reward  += separation_reward;
+            env->log.t_alignment_reward   += alignment_reward;
             env->log.n                  += 1.0f;
         }
     }

From aef44d66eaad602cb557b3016036993590547797 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Tue, 5 May 2026 23:18:20 -0400
Subject: [PATCH 63/82] fix margin turn factor

---
 ocean/boids/boids.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 69726abbaa..7fa6040232 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -7,17 +7,17 @@
 
 #include "raylib.h"
 
+#define BOID_WIDTH 16.0f
+#define BOID_HEIGHT 16.0f
 #define TOP_MARGIN 50
 #define BOTTOM_MARGIN 50
 #define LEFT_MARGIN 50
 #define RIGHT_MARGIN 50
 #define VELOCITY_CAP 5
 #define VISUAL_RANGE 400
-#define PROTECTED_RANGE 60
+#define PROTECTED_RANGE ((int)(1.5f * BOID_WIDTH))
 #define WIDTH 1080
 #define HEIGHT 720
-#define BOID_WIDTH 32.0f
-#define BOID_HEIGHT 32.0f
 #define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png"
 #define MAX_DIST 2000
 #define EPS 1e-8f // avoids div by zero in angle calc
@@ -261,8 +261,8 @@ void c_step(Boids *env) {
             || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN
         ) {
             normal_vy -= env->margin_turn_factor;
-        } else { margin_turn_reward = 0; }
-        current_boid_reward -= margin_turn_reward + cohesion_reward + separation_reward + alignment_reward;
+        } else { margin_turn_reward += env->margin_turn_factor; }
+        current_boid_reward = margin_turn_reward + cohesion_reward + separation_reward + alignment_reward;
 
         float n_mag = sqrtf(normal_vx*normal_vx + normal_vy*normal_vy);
         if (n_mag > VELOCITY_CAP) {
@@ -273,9 +273,9 @@ void c_step(Boids *env) {
         // printf("%f, %f || %f, %f = %f\n", current_boid->velocity.x, current_boid->velocity.y, normal_vx, normal_vy, angle_diff);
 
         // Normalization
-        // env->rewards[current_indx] = current_boid_reward / 5.0f;
+        env->rewards[current_indx] = current_boid_reward / 5.0f;
         // env->rewards[current_indx] = current_boid_reward / 205.0f;
-        env->rewards[current_indx] = current_boid_reward / 50.0f;
+        // env->rewards[current_indx] = current_boid_reward / 50.0f;
 
         //log updates
         if (env->tick == env->report_interval) {

From 43e092a91212e3ea7d12aff427a903b4ce6bf0c3 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 6 May 2026 23:14:29 -0400
Subject: [PATCH 64/82] update rewards to 64

---
 ocean/boids/boids.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 7fa6040232..130085e086 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -273,9 +273,9 @@ void c_step(Boids *env) {
         // printf("%f, %f || %f, %f = %f\n", current_boid->velocity.x, current_boid->velocity.y, normal_vx, normal_vy, angle_diff);
 
         // Normalization
-        env->rewards[current_indx] = current_boid_reward / 5.0f;
+        // env->rewards[current_indx] = current_boid_reward / 5.0f;
         // env->rewards[current_indx] = current_boid_reward / 205.0f;
-        // env->rewards[current_indx] = current_boid_reward / 50.0f;
+        env->rewards[current_indx] = current_boid_reward / 64.0f;
 
         //log updates
         if (env->tick == env->report_interval) {

From 09fe18da7edaf5f3ff2ee01afc4bd599c76aceff Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 6 May 2026 23:15:21 -0400
Subject: [PATCH 65/82] good cohesion and seperation magnitudes

---
 config/boids.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 06a42a5541..0e34e093a1 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -1,6 +1,6 @@
 [base]
 package = ocean
-env_name = boids 
+env_name = boids
 ; policy_name = Boids
 policy_name = Policy
 rnn_name = Recurrent
@@ -17,7 +17,7 @@ report_interval = 1
 ; separation_factor = 0.0128
 ; alignment_factor = 0.02
 margin_turn_factor = 1.0
-cohesion_factor = 1.0
+cohesion_factor = 0.001
 separation_factor = 0.0
 alignment_factor = 0.0
 

From fed0611f76d3ec047067769225ae806655ad5bd7 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 8 May 2026 20:36:02 -0400
Subject: [PATCH 66/82] remove unused commented configs

---
 config/boids.ini | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/config/boids.ini b/config/boids.ini
index 0e34e093a1..dd9896ea55 100644
--- a/config/boids.ini
+++ b/config/boids.ini
@@ -1,10 +1,8 @@
 [base]
 package = ocean
 env_name = boids
-; policy_name = Boids
 policy_name = Policy
 rnn_name = Recurrent
-; rnn_name = None
 
 [env]
 num_envs = 64
@@ -31,7 +29,6 @@ total_timesteps = 100_000_000
 gamma = 0.95
 learning_rate = 0.025
 minibatch_size = 16384
-; minibatch_size = 1
 
 [sweep]
 method = Protein

From 73b64f06948f70b3dcf13f0201c3d05d7e25ba29 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 8 May 2026 21:58:16 -0400
Subject: [PATCH 67/82] remove debug prints

---
 ocean/boids/boids.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 130085e086..ca30aef625 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -270,7 +270,6 @@ void c_step(Boids *env) {
             normal_vy = (normal_vy / n_mag) * VELOCITY_CAP;
         }
         angle_diff = velocity_angle_diff(normal_vx, normal_vy, current_boid->velocity.x, current_boid->velocity.y);
-        // printf("%f, %f || %f, %f = %f\n", current_boid->velocity.x, current_boid->velocity.y, normal_vx, normal_vy, angle_diff);
 
         // Normalization
         // env->rewards[current_indx] = current_boid_reward / 5.0f;
@@ -290,9 +289,6 @@ void c_step(Boids *env) {
     }
 
     if (env->tick == env->report_interval) env->tick = 0;
-    // printf("===================================================================================\n");
-    // printf("===================================================================================\n");
-    // printf("===================================================================================\n");
     compute_observations(env);
 }
 

From 3a4c0ec230c1372040e1510aea5661c481600e98 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 8 May 2026 22:39:30 -0400
Subject: [PATCH 68/82] remove unused includes and MAX_DIST

---
 ocean/boids/boids.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index ca30aef625..5bd4b0b2b8 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -1,8 +1,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <string.h>
-#include <limits.h>
 #include <stdbool.h>
 
 #include "raylib.h"
@@ -19,7 +17,6 @@
 #define WIDTH 1080
 #define HEIGHT 720
 #define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png"
-#define MAX_DIST 2000
 #define EPS 1e-8f // avoids div by zero in angle calc
 
 typedef struct {

From a1b8a41083cd30525ebfc8f932221c894aa5393d Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 8 May 2026 23:03:07 -0400
Subject: [PATCH 69/82] clean struct declaration

---
 ocean/boids/boids.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 5bd4b0b2b8..6b6108ba8c 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -40,7 +40,12 @@ typedef struct {
     Velocity velocity;
 } Boid;
 
-typedef struct Client Client;
+typedef struct {
+    float width;
+    float height;
+    Texture2D boid_texture;
+} Client ;
+
 typedef struct {
     // Flat array of shape (num_agents * 8) values:
     // - Each boid has 8 values corresponding to (x, y, vx, vy, dx, dy, dvx, dvy)
@@ -289,13 +294,6 @@ void c_step(Boids *env) {
     compute_observations(env);
 }
 
-typedef struct Client Client;
-struct Client {
-    float width;
-    float height;
-    Texture2D boid_texture;
-};
-
 void c_close_client(Client* client) {
     UnloadTexture(client->boid_texture);
     CloseWindow();

From bf1304a318ab7bc8f8dd3e35c533cc394c871e70 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 8 May 2026 23:45:37 -0400
Subject: [PATCH 70/82] remove duplication and rename indx to idx

---
 ocean/boids/boids.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 6b6108ba8c..a7735f7e66 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -104,11 +104,8 @@ void init(Boids *env) {
     env->log = (Log){0};
     env->tick = 0;
 
-    for (unsigned current_indx = 0; current_indx < env->num_agents; current_indx++) {
-        env->boids[current_indx].x = rndf(LEFT_MARGIN, WIDTH  - RIGHT_MARGIN);
-        env->boids[current_indx].y = rndf(BOTTOM_MARGIN, HEIGHT - TOP_MARGIN);
-        env->boids[current_indx].velocity.x = 0;
-        env->boids[current_indx].velocity.y = 0;
+    for (unsigned current_idx = 0; current_idx < env->num_agents; current_idx++) {
+        respawn_boid(env, current_idx);
     }
 }
 

From 884cb855552e599748350517fcedf88ed8246547 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 8 May 2026 23:50:48 -0400
Subject: [PATCH 71/82] rename respawn to spawn

---
 ocean/boids/boids.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index a7735f7e66..859429d5b5 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -84,7 +84,7 @@ static inline float velocity_angle_diff(float ref_x, float ref_y, float actual_x
     return acosf(cos_theta);
 }
 
-static void respawn_boid(Boids *env, unsigned int i) {
+static void spawn_boid(Boids *env, unsigned int i) {
     env->boids[i].x = rndf(LEFT_MARGIN, WIDTH  - RIGHT_MARGIN);
     env->boids[i].y = rndf(BOTTOM_MARGIN, HEIGHT - TOP_MARGIN);
     env->boids[i].velocity.x = 0;
@@ -105,7 +105,7 @@ void init(Boids *env) {
     env->tick = 0;
 
     for (unsigned current_idx = 0; current_idx < env->num_agents; current_idx++) {
-        respawn_boid(env, current_idx);
+        spawn_boid(env, current_idx);
     }
 }
 
@@ -144,7 +144,7 @@ void c_reset(Boids *env) {
     env->log = (Log){0};
     env->tick = 0;
     for (unsigned boid_indx = 0; boid_indx < env->num_agents; boid_indx++) {
-        respawn_boid(env, boid_indx);
+        spawn_boid(env, boid_indx);
     }
     compute_observations(env);
 }

From 5ad536e4418b3389bba2b0e4190d4a294b0fe9fa Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 9 May 2026 00:02:37 -0400
Subject: [PATCH 72/82] compress init boid spawn loop

---
 ocean/boids/boids.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 859429d5b5..ad900a9f01 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -104,9 +104,7 @@ void init(Boids *env) {
     env->log = (Log){0};
     env->tick = 0;
 
-    for (unsigned current_idx = 0; current_idx < env->num_agents; current_idx++) {
-        spawn_boid(env, current_idx);
-    }
+    for (unsigned idx = 0; idx < env->num_agents; idx++) spawn_boid(env, idx);
 }
 
 

From b34ee4f54fe19fc6f3cc99f0cd78726124a2b50f Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 9 May 2026 00:24:04 -0400
Subject: [PATCH 73/82] unstack c_render function

---
 ocean/boids/boids.h | 59 +++++++++++++++++++++------------------------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index ad900a9f01..cf69580118 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -335,38 +335,33 @@ void c_render(Boids* env) {
             return;
         }
     }
-
-    if (!WindowShouldClose() && IsWindowReady()) {
-        if (IsKeyDown(KEY_ESCAPE)) {
-            exit(0);
-        }
-
-        BeginDrawing();
-        ClearBackground((Color){6, 24, 24, 255});
-
-        for (unsigned boid_indx = 0; boid_indx < env->num_agents; boid_indx++) {
-            DrawTexturePro(
-                env->client->boid_texture,
-                (Rectangle){
-                    (env->boids[boid_indx].velocity.x > 0) ? 0.0f : 128.0f,
-                    0.0f,
-                    128.0f,
-                    128.0f,
-                },
-                (Rectangle){
-                    env->boids[boid_indx].x,
-                    env->boids[boid_indx].y,
-                    BOID_WIDTH,
-                    BOID_HEIGHT
-                },
-                (Vector2){0.0f, 0.0f},
-                0,
-                WHITE
-            );
-        }
-
-        EndDrawing();
-    } else {
+    if (WindowShouldClose() || !IsWindowReady()) {
         TraceLog(LOG_WARNING, "Window is not ready or should close");
+        return;
+    }
+    if (IsKeyDown(KEY_ESCAPE)) exit(0);
+
+    BeginDrawing();
+    ClearBackground((Color){6, 24, 24, 255});
+    for (unsigned boid_indx = 0; boid_indx < env->num_agents; boid_indx++) {
+        DrawTexturePro(
+            env->client->boid_texture,
+            (Rectangle){
+                (env->boids[boid_indx].velocity.x > 0) ? 0.0f : 128.0f,
+                0.0f,
+                128.0f,
+                128.0f,
+            },
+            (Rectangle){
+                env->boids[boid_indx].x,
+                env->boids[boid_indx].y,
+                BOID_WIDTH,
+                BOID_HEIGHT
+            },
+            (Vector2){0.0f, 0.0f},
+            0,
+            WHITE
+        );
     }
+    EndDrawing();
 }

From efaf64820be2fe0b3d9217295589ef4bc7b50627 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 9 May 2026 14:38:57 -0400
Subject: [PATCH 74/82] deduplicate boid velocity application

---
 ocean/boids/boids.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index cf69580118..5156daf6e0 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -138,6 +138,13 @@ static void compute_observations(Boids *env) {
     }
 }
 
+void apply_velocity(Boid* boid, float vx, float vy) {
+    boid->velocity.x = flclip(boid->velocity.x + vx, -VELOCITY_CAP, VELOCITY_CAP);
+    boid->velocity.y = flclip(boid->velocity.y + vy, -VELOCITY_CAP, VELOCITY_CAP);
+    boid->x = flclip(boid->x + boid->velocity.x, 0, WIDTH  - BOID_WIDTH);
+    boid->y = flclip(boid->y + boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
+}
+
 void c_reset(Boids *env) {
     env->log = (Log){0};
     env->tick = 0;
@@ -174,14 +181,10 @@ void c_step(Boids *env) {
         // apply action
         current_boid = &env->boids[current_indx];
         if (manual_control) {
-            current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP);
-            current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
+            apply_velocity(current_boid, (mouse_x - current_boid->x), (mouse_y - current_boid->y));
         } else {
-            current_boid->velocity.x = flclip(current_boid->velocity.x + (env->actions[current_indx*2] - 1.0f), -VELOCITY_CAP, VELOCITY_CAP);
-            current_boid->velocity.y = flclip(current_boid->velocity.y + (env->actions[current_indx*2 + 1] - 1.0f), -VELOCITY_CAP, VELOCITY_CAP);
+            apply_velocity(current_boid, (env->actions[current_indx*2] - 1.0f), (env->actions[current_indx*2 + 1] - 1.0f));
         }
-        current_boid->x = flclip(current_boid->x + current_boid->velocity.x, 0, WIDTH  - BOID_WIDTH);
-        current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
 
         // reward calculation
         current_boid_reward = 0.0f;

From f2324f9fe437a82644afebfd9027f56834d936f8 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sat, 9 May 2026 14:41:26 -0400
Subject: [PATCH 75/82] rename func to apply_action

---
 ocean/boids/boids.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 5156daf6e0..d45bacdcb4 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -138,7 +138,7 @@ static void compute_observations(Boids *env) {
     }
 }
 
-void apply_velocity(Boid* boid, float vx, float vy) {
+void apply_action(Boid* boid, float vx, float vy) {
     boid->velocity.x = flclip(boid->velocity.x + vx, -VELOCITY_CAP, VELOCITY_CAP);
     boid->velocity.y = flclip(boid->velocity.y + vy, -VELOCITY_CAP, VELOCITY_CAP);
     boid->x = flclip(boid->x + boid->velocity.x, 0, WIDTH  - BOID_WIDTH);
@@ -178,12 +178,11 @@ void c_step(Boids *env) {
     env->log.t_separation_reward = 0;
     env->log.t_alignment_reward = 0;
     for (unsigned current_indx = 0; current_indx < env->num_agents; current_indx++) {
-        // apply action
         current_boid = &env->boids[current_indx];
         if (manual_control) {
-            apply_velocity(current_boid, (mouse_x - current_boid->x), (mouse_y - current_boid->y));
+            apply_action(current_boid, (mouse_x - current_boid->x), (mouse_y - current_boid->y));
         } else {
-            apply_velocity(current_boid, (env->actions[current_indx*2] - 1.0f), (env->actions[current_indx*2 + 1] - 1.0f));
+            apply_action(current_boid, (env->actions[current_indx*2] - 1.0f), (env->actions[current_indx*2 + 1] - 1.0f));
         }
 
         // reward calculation

From 33939833d02007a78c524e3ae7e51f4ad5189aca Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 10 May 2026 13:51:25 -0400
Subject: [PATCH 76/82] remove unused commented code

---
 ocean/boids/boids.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index d45bacdcb4..0eb290ca18 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -220,9 +220,6 @@ void c_step(Boids *env) {
             }
         }
         if (protected_count > 0) {
-            // protected_range_diff = (float)(env->num_agents - protected_count) - protected_count;
-            // current_boid_reward += protected_range_diff * env->seperation_factor;
-
             rule_mag = sqrtf(protected_x_sum*protected_x_sum + protected_y_sum*protected_y_sum) + EPS;
             normal_vx += (protected_x_sum / rule_mag) * env->separation_factor;
             normal_vy += (protected_y_sum / rule_mag) * env->separation_factor;

From 21fb325d6cec424017bcb0a10e1ef0ce7e728701 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 10 May 2026 14:17:51 -0400
Subject: [PATCH 77/82] remove useless perf metric

---
 ocean/boids/binding.c | 1 -
 ocean/boids/boids.h   | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
index f6109adea3..65b4998249 100644
--- a/ocean/boids/binding.c
+++ b/ocean/boids/binding.c
@@ -18,7 +18,6 @@ void my_init(Env* env, Dict* kwargs) {
 }
 
 void my_log(Log* log, Dict* out) {
-    dict_set(out, "perf", log->perf);
     dict_set(out, "score", log->score);
     dict_set(out, "margin_turn_reward", log->t_margin_turn_reward);
     dict_set(out, "cohesion_reward", log->t_cohesion_reward);
diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 0eb290ca18..1cbec4c9d9 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -20,7 +20,6 @@
 #define EPS 1e-8f // avoids div by zero in angle calc
 
 typedef struct {
-    float perf;
     float score;
     float n;
     float t_margin_turn_reward;
@@ -170,7 +169,6 @@ void c_step(Boids *env) {
 
     env->tick++;
     env->rewards[0] = 0.0;
-    env->log.perf = 0;
     env->log.score = 0;
     env->log.n = 0;
     env->log.t_margin_turn_reward = 0;
@@ -274,7 +272,6 @@ void c_step(Boids *env) {
 
         //log updates
         if (env->tick == env->report_interval) {
-            env->log.perf               += angle_diff;
             env->log.score              += env->rewards[current_indx];
             env->log.t_margin_turn_reward += margin_turn_reward;
             env->log.t_cohesion_reward    += cohesion_reward;

From 3a5427f19c68aee8724f15271187074ddc4cf281 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Sun, 10 May 2026 17:28:01 -0400
Subject: [PATCH 78/82] reward func checkpt

---
 ocean/boids/boids.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 1cbec4c9d9..8158cae821 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -45,6 +45,13 @@ typedef struct {
     Texture2D boid_texture;
 } Client ;
 
+typedef struct {
+    float margin_turn;
+    float cohesion;
+    float separation;
+    float alignment;
+} Rewards;
+
 typedef struct {
     // Flat array of shape (num_agents * 8) values:
     // - Each boid has 8 values corresponding to (x, y, vx, vy, dx, dy, dvx, dvy)
@@ -144,6 +151,10 @@ void apply_action(Boid* boid, float vx, float vy) {
     boid->y = flclip(boid->y + boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
 }
 
+Rewards calc_rewards() {
+    Rewards rewards = {0};
+}
+
 void c_reset(Boids *env) {
     env->log = (Log){0};
     env->tick = 0;

From dfb5e7f3880999d163315916d76ba3c88a1c443f Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Wed, 13 May 2026 21:11:37 -0400
Subject: [PATCH 79/82] remove rewards struct

---
 ocean/boids/boids.h | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 8158cae821..1cbec4c9d9 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -45,13 +45,6 @@ typedef struct {
     Texture2D boid_texture;
 } Client ;
 
-typedef struct {
-    float margin_turn;
-    float cohesion;
-    float separation;
-    float alignment;
-} Rewards;
-
 typedef struct {
     // Flat array of shape (num_agents * 8) values:
     // - Each boid has 8 values corresponding to (x, y, vx, vy, dx, dy, dvx, dvy)
@@ -151,10 +144,6 @@ void apply_action(Boid* boid, float vx, float vy) {
     boid->y = flclip(boid->y + boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);
 }
 
-Rewards calc_rewards() {
-    Rewards rewards = {0};
-}
-
 void c_reset(Boids *env) {
     env->log = (Log){0};
     env->tick = 0;

From e609064e4fab204ef2a63e18c457fb98ee571f96 Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 14 May 2026 01:20:08 -0400
Subject: [PATCH 80/82] remove angle diff and all the dead code it creates

---
 ocean/boids/boids.h | 41 +++++------------------------------------
 1 file changed, 5 insertions(+), 36 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 1cbec4c9d9..0b27dc6a1e 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -71,17 +71,6 @@ static inline float flmax(float a, float b) { return a > b ? a : b; }
 static inline float flmin(float a, float b) { return a > b ? b : a; }
 static inline float flclip(float x,float lo,float hi) { return flmin(hi,flmax(lo,x)); }
 static inline float rndf(float lo,float hi) { return lo + (float)rand()/(float)RAND_MAX*(hi-lo); }
-static inline float velocity_angle_diff(float ref_x, float ref_y, float actual_x, float actual_y) {
-    float ref_mag = sqrtf(ref_x*ref_x + ref_y*ref_y);
-    float actual_mag = sqrtf(actual_x*actual_x + actual_y*actual_y);
-    float denom, cos_theta;
-
-    if (ref_mag <= EPS && actual_mag <= EPS) return 0.0f;
-
-    denom = ref_mag * actual_mag + EPS;
-    cos_theta = flclip((ref_x*actual_x + ref_y*actual_y) / denom, -1.0f, 1.0f);
-    return acosf(cos_theta);
-}
 
 static void spawn_boid(Boids *env, unsigned int i) {
     env->boids[i].x = rndf(LEFT_MARGIN, WIDTH  - RIGHT_MARGIN);
@@ -160,7 +149,6 @@ void c_step(Boids *env) {
     float diff_x, diff_y, dist, current_boid_reward;
     float margin_turn_reward, cohesion_reward, separation_reward, alignment_reward;
     float protected_x_sum, protected_y_sum;
-    float normal_vx, normal_vy, angle_diff;
     float rule_dx, rule_dy, rule_mag;
     unsigned visual_count, protected_count;
     bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
@@ -197,8 +185,6 @@ void c_step(Boids *env) {
         vis_y_sum = 0.0f;
         protected_x_sum = 0.0f;
         protected_y_sum = 0.0f;
-        normal_vx = 0.0f;
-        normal_vy = 0.0f;
         for (unsigned observed_indx = 0; observed_indx < env->num_agents; observed_indx++) {
             if (current_indx == observed_indx) continue;
             observed_boid = env->boids[observed_indx];
@@ -219,8 +205,6 @@ void c_step(Boids *env) {
         }
         if (protected_count > 0) {
             rule_mag = sqrtf(protected_x_sum*protected_x_sum + protected_y_sum*protected_y_sum) + EPS;
-            normal_vx += (protected_x_sum / rule_mag) * env->separation_factor;
-            normal_vy += (protected_y_sum / rule_mag) * env->separation_factor;
             separation_reward -= rule_mag * env->separation_factor;
         }
         if (visual_count) {
@@ -235,35 +219,20 @@ void c_step(Boids *env) {
             rule_dx = vis_vx_avg - current_boid->velocity.x;
             rule_dy = vis_vy_avg - current_boid->velocity.y;
             rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
-            normal_vx += (rule_dx / rule_mag) * env->alignment_factor;
-            normal_vy += (rule_dy / rule_mag) * env->alignment_factor;
             alignment_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
             alignment_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
 
             rule_dx = vis_x_avg - current_boid->x;
             rule_dy = vis_y_avg - current_boid->y;
             rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
-            normal_vx += (rule_dx / rule_mag) * env->cohesion_factor;
-            normal_vy += (rule_dy / rule_mag) * env->cohesion_factor;
         }
 
-        margin_turn_reward -= env->margin_turn_factor;
-        if (current_boid->y < TOP_MARGIN || current_boid->x < LEFT_MARGIN) {
-            normal_vy += env->margin_turn_factor;
-        } else if (
-            current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN
-            || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN
-        ) {
-            normal_vy -= env->margin_turn_factor;
-        } else { margin_turn_reward += env->margin_turn_factor; }
-        current_boid_reward = margin_turn_reward + cohesion_reward + separation_reward + alignment_reward;
-
-        float n_mag = sqrtf(normal_vx*normal_vx + normal_vy*normal_vy);
-        if (n_mag > VELOCITY_CAP) {
-            normal_vx = (normal_vx / n_mag) * VELOCITY_CAP;
-            normal_vy = (normal_vy / n_mag) * VELOCITY_CAP;
+        if (current_boid->y < TOP_MARGIN || current_boid->x < LEFT_MARGIN
+            || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN
+            || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) {
+            margin_turn_reward -= env->margin_turn_factor;
         }
-        angle_diff = velocity_angle_diff(normal_vx, normal_vy, current_boid->velocity.x, current_boid->velocity.y);
+        current_boid_reward = margin_turn_reward + cohesion_reward + separation_reward + alignment_reward;
 
         // Normalization
         // env->rewards[current_indx] = current_boid_reward / 5.0f;

From 050e24066a688757e13d092c4aa8fdc34cdc870b Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Thu, 14 May 2026 21:09:50 -0400
Subject: [PATCH 81/82] organizing var zeroing via inlining them

---
 ocean/boids/boids.h | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 0b27dc6a1e..6b3a09c71e 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -173,18 +173,10 @@ void c_step(Boids *env) {
 
         // reward calculation
         current_boid_reward = 0.0f;
-        margin_turn_reward = 0.0f;
-        cohesion_reward = 0.0f;
-        separation_reward = 0.0f;
-        alignment_reward = 0.0f;
-        protected_count = 0;
-        visual_count = 0;
-        vis_vx_sum = 0.0f;
-        vis_vy_sum = 0.0f;
-        vis_x_sum = 0.0f;
-        vis_y_sum = 0.0f;
-        protected_x_sum = 0.0f;
-        protected_y_sum = 0.0f;
+        margin_turn_reward = 0.0f; cohesion_reward = 0.0f; separation_reward = 0.0f; alignment_reward = 0.0f;
+        protected_count = 0; visual_count = 0;
+        vis_vx_sum = 0.0f; vis_vy_sum = 0.0f; vis_x_sum = 0.0f; vis_y_sum = 0.0f;
+        protected_x_sum = 0.0f; protected_y_sum = 0.0f;
         for (unsigned observed_indx = 0; observed_indx < env->num_agents; observed_indx++) {
             if (current_indx == observed_indx) continue;
             observed_boid = env->boids[observed_indx];

From f079fc6722249460d17472c69c4e17a3ef3fb64c Mon Sep 17 00:00:00 2001
From: PLAZMAMA <maorioriori@gmail.com>
Date: Fri, 15 May 2026 00:17:34 -0400
Subject: [PATCH 82/82] remove rule mag for cohesion and alignment and reorder
 their code

---
 ocean/boids/boids.h | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/ocean/boids/boids.h b/ocean/boids/boids.h
index 6b3a09c71e..f5682b48ac 100644
--- a/ocean/boids/boids.h
+++ b/ocean/boids/boids.h
@@ -200,23 +200,15 @@ void c_step(Boids *env) {
             separation_reward -= rule_mag * env->separation_factor;
         }
         if (visual_count) {
-            vis_x_avg  = vis_x_sum  / visual_count;
-            vis_y_avg  = vis_y_sum  / visual_count;
-            vis_vx_avg = vis_vx_sum / visual_count;
-            vis_vy_avg = vis_vy_sum / visual_count;
-
+            vis_x_avg  = vis_x_sum / visual_count;
+            vis_y_avg  = vis_y_sum / visual_count;
             cohesion_reward -= fabsf(vis_x_avg  - current_boid->x) * env->cohesion_factor;
             cohesion_reward -= fabsf(vis_y_avg  - current_boid->y) * env->cohesion_factor;
-
-            rule_dx = vis_vx_avg - current_boid->velocity.x;
-            rule_dy = vis_vy_avg - current_boid->velocity.y;
-            rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
+    
+            vis_vx_avg = vis_vx_sum / visual_count;
+            vis_vy_avg = vis_vy_sum / visual_count;
             alignment_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
             alignment_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
-
-            rule_dx = vis_x_avg - current_boid->x;
-            rule_dy = vis_y_avg - current_boid->y;
-            rule_mag = sqrtf(rule_dx*rule_dx + rule_dy*rule_dy) + EPS;
         }
 
         if (current_boid->y < TOP_MARGIN || current_boid->x < LEFT_MARGIN