RayforceDB · singaraiona · Apr 29, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/src/lang/eval.c b/src/lang/eval.c
@@ -2021,8 +2021,11 @@ static void ray_register_builtins(void) {
     register_binary_op("<=",  RAY_FN_ATOMIC, ray_lte_fn,    OP_LE);
     register_binary_op("==",  RAY_FN_ATOMIC, ray_eq_fn,  OP_EQ);
     register_binary_op("!=",  RAY_FN_ATOMIC, ray_neq_fn,    OP_NE);
-    register_vary("and", RAY_FN_NONE, ray_and_vary_fn);
-    register_vary("or",  RAY_FN_NONE, ray_or_vary_fn);
+    /* Special-form so args are passed unevaluated and the kernel can
+     * short-circuit on the first determining scalar (matches v1 and the
+     * Lisp/Clojure convention). */
+    register_vary("and", RAY_FN_SPECIAL_FORM, ray_and_vary_fn);
+    register_vary("or",  RAY_FN_SPECIAL_FORM, ray_or_vary_fn);
     register_unary_op("not",  RAY_FN_NONE,   ray_not_fn, OP_NOT);
     register_unary_op("neg",  RAY_FN_ATOMIC, ray_neg_fn, OP_NEG);
     register_unary("round",   RAY_FN_ATOMIC, ray_round_fn);

diff --git a/src/ops/arith.c b/src/ops/arith.c
@@ -331,14 +331,23 @@ ray_t* ray_mod_fn(ray_t* a, ray_t* b) {
 ray_t* ray_neg_fn(ray_t* x) {
     if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; }
     if (x->type == -RAY_F64) return make_f64(-x->f64);
-    /* Negate via unsigned to avoid signed-overflow UB on INT_MIN.
-     * Wraparound is defined for unsigned types; (T)(uT)(-(uT)x) yields
-     * the same wrapped value the corresponding two's-complement
-     * arithmetic would produce — so (neg INT_MIN) returns INT_MIN
-     * (overflow-wrap) consistently with binary `(- 0 INT_MIN)`. */
-    if (x->type == -RAY_I64) return make_i64((int64_t)(-(uint64_t)x->i64));
-    if (x->type == -RAY_I32) return make_i32((int32_t)(-(uint32_t)x->i32));
-    if (x->type == -RAY_I16) return make_i16((int16_t)(-(uint16_t)x->i16));
+    /* INT_MIN is the lone overflow case for signed negation: -INT_MIN
+     * doesn't fit in the same width.  Per k/q convention, surface this
+     * as a typed null of the same width — preserving type, avoiding UB,
+     * and giving the caller a `nil?`-detectable signal that overflow
+     * happened.  Consistent with how `(neg 0Ni) → 0Ni` propagates. */
+    if (x->type == -RAY_I64) {
+        if (RAY_UNLIKELY(x->i64 == INT64_MIN)) return ray_typed_null(-RAY_I64);
+        return make_i64(-x->i64);
+    }
+    if (x->type == -RAY_I32) {
+        if (RAY_UNLIKELY(x->i32 == INT32_MIN)) return ray_typed_null(-RAY_I32);
+        return make_i32(-x->i32);
+    }
+    if (x->type == -RAY_I16) {
+        if (RAY_UNLIKELY(x->i16 == INT16_MIN)) return ray_typed_null(-RAY_I16);
+        return make_i16(-x->i16);
+    }
     return ray_error("type", NULL);
 }
 
@@ -366,15 +375,25 @@ ray_t* ray_ceil_fn(ray_t* x) {
     return ray_error("type", NULL);
 }
 
-/* abs: absolute value, preserves type.  Uses unsigned-wrap negation
- * for the negative branch — same overflow-wrap semantics as `neg`,
- * so (abs INT_MIN) returns INT_MIN rather than UB. */
+/* abs: absolute value, preserves type.  INT_MIN has no representable
+ * positive in the same width — return a typed null instead (same
+ * convention as `neg`).  Stops `(abs -32768h) → -32768h` (negative
+ * result from abs!) and `(abs INT_MIN)` UB simultaneously. */
 ray_t* ray_abs_fn(ray_t* x) {
     if (RAY_ATOM_IS_NULL(x)) { ray_retain(x); return x; }
     if (x->type == -RAY_F64) return make_f64(fabs(x->f64));
-    if (x->type == -RAY_I64) return make_i64(x->i64 < 0 ? (int64_t)(-(uint64_t)x->i64) : x->i64);
-    if (x->type == -RAY_I32) return make_i32(x->i32 < 0 ? (int32_t)(-(uint32_t)x->i32) : x->i32);
-    if (x->type == -RAY_I16) return make_i16(x->i16 < 0 ? (int16_t)(-(uint16_t)x->i16) : x->i16);
+    if (x->type == -RAY_I64) {
+        if (RAY_UNLIKELY(x->i64 == INT64_MIN)) return ray_typed_null(-RAY_I64);
+        return make_i64(x->i64 < 0 ? -x->i64 : x->i64);
+    }
+    if (x->type == -RAY_I32) {
+        if (RAY_UNLIKELY(x->i32 == INT32_MIN)) return ray_typed_null(-RAY_I32);
+        return make_i32(x->i32 < 0 ? -x->i32 : x->i32);
+    }
+    if (x->type == -RAY_I16) {
+        if (RAY_UNLIKELY(x->i16 == INT16_MIN)) return ray_typed_null(-RAY_I16);
+        return make_i16(x->i16 < 0 ? -x->i16 : x->i16);
+    }
     return ray_error("type", NULL);
 }
 

diff --git a/src/ops/cmp.c b/src/ops/cmp.c
@@ -22,6 +22,9 @@
  */
 
 #include "lang/internal.h"
+#include "ops/ops.h"   /* RAY_LAZY, ray_is_lazy, ray_lazy_materialize */
+
+#include <assert.h>
 
 /* Helper: compare char atom vs string atom.
  * Returns: -1 if no char/string pair, else memcmp-like result via *out. */
@@ -45,24 +48,24 @@ int char_str_cmp(ray_t* a, ray_t* b, int *out) {
  * the backing STR via ray_sym_str and delegate to ray_str_cmp, which
  * uses the 12-byte SSO inline path for short symbols.
  *
- * If a sym_str lookup fails (NULL — e.g. corrupted intern table or
- * uninitialised state) we fall back to comparing the raw interned ids
- * rather than declaring the unequal symbols equal.  Stable, never
- * silently collapses distinct symbols. */
+ * Invariant: any valid SYM atom resolves to its interned string.  A
+ * NULL from ray_sym_str means corruption (uninitialised intern table,
+ * out-of-range id, or evicted slot) — no defensible total order exists
+ * in that state.  We assert and let the process abort rather than
+ * fabricate an answer (returning 0 silently collapses distinct symbols;
+ * returning ±1 by raw id invents a non-lexicographic ordering that
+ * still lies about the contract).  Matches v1 behaviour, which also
+ * trusts the invariant (and would SIGSEGV via strcmp(NULL,...) if it
+ * broke). */
 int sym_atom_cmp(ray_t* a, ray_t* b) {
     if (a->i64 == b->i64) return 0;
     ray_t* sa = ray_sym_str(a->i64);
     ray_t* sb = ray_sym_str(b->i64);
-    int r;
-    if (sa && sb) {
-        r = ray_str_cmp(sa, sb);
-    } else {
-        /* Fallback: order by interned id (stable, total).  Same sign
-         * convention as memcmp: negative if a < b, positive if a > b. */
-        r = (a->i64 < b->i64) ? -1 : 1;
-    }
-    if (sa) ray_release(sa);
-    if (sb) ray_release(sb);
+    assert(sa && sb && "sym_atom_cmp: corrupted intern table — "
+                       "valid SYM atom must resolve to interned string");
+    int r = ray_str_cmp(sa, sb);
+    ray_release(sa);
+    ray_release(sb);
     return r;
 }
 
@@ -249,30 +252,63 @@ ray_t* ray_or_fn(ray_t* a, ray_t* b) {
     return make_bool((is_truthy(a) || is_truthy(b)) ? 1 : 0);
 }
 
-/* Variadic left-fold over the binary kernels.  (and a b c) folds as
- * (and (and a b) c) — same shape Lisp/Clojure use. */
+/* Special-form variadic AND/OR with short-circuit (matches v1).
+ *
+ * `args` are UNEVALUATED AST nodes — registered with RAY_FN_SPECIAL_FORM
+ * so the evaluator hands us raw forms rather than computed values.  We
+ * call ray_eval per arg ourselves and stop as soon as the result is
+ * determined: AND on first scalar falsy, OR on first scalar truthy.
+ *
+ * Mixed scalar+vector: when the running accumulator becomes a *scalar*
+ * with the determining truth value, we return it immediately — same
+ * shape as Lisp/Clojure where short-circuit yields the determinant.
+ * If the accumulator is a vector we cannot short-circuit (subsequent
+ * args may be vectors that still need element-wise combination), so we
+ * fall through to ray_and_fn / ray_or_fn for that step. */
+static ray_t* eval_and_short(ray_t* arg) {
+    ray_t* v = ray_eval(arg);
+    if (!v || RAY_IS_ERR(v)) return v;
+    if (ray_is_lazy(v)) v = ray_lazy_materialize(v);
+    return v;
+}
+
 ray_t* ray_and_vary_fn(ray_t** args, int64_t n) {
     if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n);
-    ray_t* acc = ray_and_fn(args[0], args[1]);
+    ray_t* acc = eval_and_short(args[0]);
     if (!acc || RAY_IS_ERR(acc)) return acc;
-    for (int64_t i = 2; i < n; i++) {
-        ray_t* next = ray_and_fn(acc, args[i]);
+    /* Short-circuit only when the running result is a *scalar* falsy.
+     * If acc is a vector, subsequent args still need element-wise
+     * combination (so `(and vec false)` broadcasts to all-false vector
+     * of acc's shape rather than a bare scalar). */
+    if (ray_is_atom(acc) && !is_truthy(acc)) return acc;
+    for (int64_t i = 1; i < n; i++) {
+        ray_t* v = eval_and_short(args[i]);
+        if (!v || RAY_IS_ERR(v)) { ray_release(acc); return v; }
+        ray_t* next = ray_and_fn(acc, v);
         ray_release(acc);
+        ray_release(v);
         if (!next || RAY_IS_ERR(next)) return next;
         acc = next;
+        if (ray_is_atom(acc) && !is_truthy(acc)) return acc;
     }
     return acc;
 }
 
 ray_t* ray_or_vary_fn(ray_t** args, int64_t n) {
     if (n < 2) return ray_error("arity", "expected at least 2 args, got %lld", (long long)n);
-    ray_t* acc = ray_or_fn(args[0], args[1]);
+    ray_t* acc = eval_and_short(args[0]);
     if (!acc || RAY_IS_ERR(acc)) return acc;
-    for (int64_t i = 2; i < n; i++) {
-        ray_t* next = ray_or_fn(acc, args[i]);
+    /* Short-circuit only on scalar truthy accumulator (see AND comment). */
+    if (ray_is_atom(acc) && is_truthy(acc)) return acc;
+    for (int64_t i = 1; i < n; i++) {
+        ray_t* v = eval_and_short(args[i]);
+        if (!v || RAY_IS_ERR(v)) { ray_release(acc); return v; }
+        ray_t* next = ray_or_fn(acc, v);
         ray_release(acc);
+        ray_release(v);
         if (!next || RAY_IS_ERR(next)) return next;
         acc = next;
+        if (ray_is_atom(acc) && is_truthy(acc)) return acc;
     }
     return acc;
 }

diff --git a/src/ops/expr.c b/src/ops/expr.c
@@ -926,6 +926,35 @@ static void expr_full_fn(void* ctx, uint32_t worker_id, int64_t start, int64_t e
     scratch_free(scratch_hdr);
 }
 
+/* Post-pass for the fused unary path: |INT64_MIN| and -INT64_MIN don't fit in
+ * i64 (signed-overflow; k/q convention surfaces this as typed null).  The
+ * element-wise loop uses unsigned wrap, so any overflow position lands as
+ * INT64_MIN in data.  Convert each such position to typed-null: zero data[i]
+ * (preserve "null position is 0" invariant) and set the null bit.  Caller
+ * must invoke single-threaded — after pool dispatch joins. */
+static void mark_i64_overflow_as_null(ray_t* result, int64_t off, int64_t len) {
+    int64_t* d = (int64_t*)ray_data(result) + off;
+    for (int64_t i = 0; i < len; i++) {
+        if (RAY_UNLIKELY(d[i] == INT64_MIN)) {
+            d[i] = 0;
+            ray_vec_set_null(result, off + i, true);
+        }
+    }
+}
+
+/* The fused unary path may produce INT64_MIN via signed-overflow only for
+ * OP_NEG and OP_ABS over an i64 source (output type i64).  Detect those
+ * shapes from the last instruction in the compiled expression. */
+static bool expr_last_op_overflows_i64(const ray_expr_t* expr) {
+    if (expr->out_type != RAY_I64 || expr->n_ins == 0) return false;
+    const expr_ins_t* last = &expr->ins[expr->n_ins - 1];
+    if (last->opcode != OP_NEG && last->opcode != OP_ABS) return false;
+    if (last->src2 != 0xFF) return false; /* unary only */
+    if (expr->regs[last->src1].type != RAY_I64) return false;
+    if (expr->regs[last->dst].type != RAY_I64) return false;
+    return true;
+}
+
 /* Evaluate compiled expression over parted (segmented) columns.
  * Iterates segments as outer loop, rebinds data pointers per segment,
  * then dispatches the existing morsel evaluator per segment. Zero copy. */
@@ -991,6 +1020,8 @@ static ray_t* expr_eval_full_parted(const ray_expr_t* expr, int64_t nrows) {
 
         global_off += seg_len;
     }
+    if (expr_last_op_overflows_i64(expr))
+        mark_i64_overflow_as_null(out, 0, nrows);
     return out;
 }
 
@@ -1014,6 +1045,8 @@ ray_t* expr_eval_full(const ray_expr_t* expr, int64_t nrows) {
     else
         expr_full_fn(&ctx, 0, 0, nrows);
 
+    if (expr_last_op_overflows_i64(expr))
+        mark_i64_overflow_as_null(out, 0, nrows);
     return out;
 }
 
@@ -1272,6 +1305,13 @@ ray_t* exec_elementwise_unary(ray_graph_t* g, ray_op_t* op, ray_t* input) {
         }
     }
 
+    /* OP_NEG/OP_ABS over i64: |INT64_MIN| and -INT64_MIN don't fit — surface
+     * as typed null (k/q convention).  Loop above used unsigned wrap, so
+     * overflow positions land as INT64_MIN in data; convert them to null. */
+    if (out_type == RAY_I64 && in_type == RAY_I64 &&
+        (op->opcode == OP_NEG || op->opcode == OP_ABS))
+        mark_i64_overflow_as_null(result, 0, len);
+
     return result;
 }
 

diff --git a/src/ops/glob.h b/src/ops/glob.h
@@ -19,6 +19,14 @@
  *   [a-z]    — range
  *   [!abc]   — negated class
  *
+ * Matching a literal metacharacter — there is no backslash escape; wrap
+ * the character in a one-element class instead:
+ *   [*]      matches a literal '*'
+ *   [?]      matches a literal '?'
+ *   [[]      matches a literal '['
+ *   []]      matches a literal ']'  (']' as first char inside [...] is literal)
+ *   [-]      matches a literal '-'  (as the sole char, no range to form)
+ *
  * `glob_match` is case-sensitive.  `glob_match_ci` lowercases ASCII letters
  * on both sides before comparing (so it matches 'A' against 'a', 'A-Z'
  * range matches both case forms, etc.).

diff --git a/src/ops/query.c b/src/ops/query.c
@@ -819,6 +819,55 @@ static ray_op_t* compile_expr_dag(ray_graph_t* g, ray_t* expr) {
             return &g->nodes[chain_id];
         }
 
+        /* Variadic `and`/`or`: fold into a balanced binary tree.
+         * `(and a b c d)` → `(and (and a b) (and c d))` — depth log2(N).
+         * Without this, n>=4 falls through `compile_expr_dag` and the
+         * caller (e.g. select WHERE) reports "WHERE predicate not
+         * supported by DAG compiler".  The fused-expr executor evaluates
+         * the resulting tree as a sequence of binary AND/OR instructions
+         * sharing scratch registers — no extra column allocations vs
+         * what hand-nested binary forms already do.
+         *
+         * Balanced tree (rather than left-fold) keeps the canonical
+         * shape symmetric and minimises dependency-chain depth, which
+         * future OoO / parallel-instruction executors can exploit. */
+        if (n >= 4) {
+            bool is_and = (fname_len == 3 && memcmp(fname, "and", 3) == 0);
+            bool is_or  = (fname_len == 2 && memcmp(fname, "or",  2) == 0);
+            if (is_and || is_or) {
+                int64_t k = n - 1;
+                if (k > 64) return NULL;          /* depth/space guard */
+                uint32_t arg_ids[64];
+                for (int64_t i = 0; i < k; i++) {
+                    ray_op_t* a = compile_expr_dag(g, elems[i + 1]);
+                    if (!a) return NULL;
+                    arg_ids[i] = a->id;
+                }
+                dag_binary_ctor ctor = is_and ? ray_and : ray_or;
+                /* Iterative pairwise reduction: at each round, fold
+                 * adjacent pairs into a single node, halving the count.
+                 * Equivalent to recursive bisect but avoids a helper. */
+                int64_t cnt = k;
+                while (cnt > 1) {
+                    int64_t out = 0;
+                    for (int64_t i = 0; i + 1 < cnt; i += 2) {
+                        /* make_binary re-resolves both inputs via stored
+                         * IDs after its own potential realloc, so the
+                         * pointers we pass here are safe to use. */
+                        ray_op_t* l = &g->nodes[arg_ids[i]];
+                        ray_op_t* r = &g->nodes[arg_ids[i + 1]];
+                        ray_op_t* combined = ctor(g, l, r);
+                        if (!combined) return NULL;
+                        arg_ids[out++] = combined->id;
+                    }
+                    if (cnt & 1)                    /* carry odd tail */
+                        arg_ids[out++] = arg_ids[cnt - 1];
+                    cnt = out;
+                }
+                return &g->nodes[arg_ids[0]];
+            }
+        }
+
         /* Binary op? */
         if (n == 3) {
             dag_binary_ctor ctor = resolve_binary_dag(fn_sym);

diff --git a/test/rfl/arith/abs.rfl b/test/rfl/arith/abs.rfl
@@ -37,13 +37,45 @@
 (type (abs [-1h 2h])) -- 'I16
 (type (abs [-1i 2i])) -- 'I32
 
-;; INT_MIN edge: same overflow-wrap convention as neg — abs of INT_MIN
-;; returns INT_MIN (no UB).  Verified under UBSan.
-;; Literal -32768h / -2147483648i can't be parsed (parser tokenises
-;; positive then negates), so verify via i64 round-trip.
-(set MIN16 (as 'i16 (as 'i64 -32768)))
-(as 'i64 (abs MIN16)) -- -32768
-(type (abs MIN16))    -- 'i16
-(set MIN32 (as 'i32 (as 'i64 -2147483648)))
-(as 'i64 (abs MIN32)) -- -2147483648
-(type (abs MIN32))    -- 'i32
+;; INT_MIN edge: |INT_MIN| doesn't fit in the same width — abs
+;; returns a typed null of the same width (k/q convention).  Stops
+;; the broken `(abs -32768h) → -32768h` (negative result from abs!)
+;; behaviour and avoids signed-overflow UB.
+
+;; (- (neg 32767h) 1h) = -32768h = INT16_MIN; (abs INT16_MIN) → 0Nh
+(nil? (abs (- (neg 32767h) 1h)))           -- true
+(type (abs (- (neg 32767h) 1h)))           -- 'i16
+
+(nil? (abs (- (neg 2147483647i) 1i)))      -- true
+(type (abs (- (neg 2147483647i) 1i)))      -- 'i32
+
+(nil? (abs (- (neg 9223372036854775807) 1))) -- true
+(type (abs (- (neg 9223372036854775807) 1))) -- 'i64
+
+;; values adjacent to INT_MIN that fit — abs works normally
+(abs -32767h)              -- 32767h
+(abs -2147483647i)         -- 2147483647i
+(abs -9223372036854775807) -- 9223372036854775807
+
+;; ──────────────────────────────────────────────────────────────────
+;; DAG (fused expression) path — `(select {x: (abs col) from: t})`
+;; over a column containing INT64_MIN must surface that row as typed
+;; null, not as a negative value (the broken "abs returns INT_MIN"
+;; case Anton flagged on PR #8).
+;;
+;; Vec literal `[...]` only accepts atom literals, so we build the
+;; INT64_MIN-bearing column via `concat` of a typed atom.
+;; ──────────────────────────────────────────────────────────────────
+
+(set Va (concat -9223372036854775808 (concat -5 (concat 5 0))))
+(set Ta (table [v] (list Va)))
+(set Ra (select {x: (abs v) from: Ta}))
+(nil? (at (at Ra 'x) 0)) -- true       ;; INT64_MIN row → null
+(at (at Ra 'x) 1)        -- 5
+(at (at Ra 'x) 2)        -- 5
+(at (at Ra 'x) 3)        -- 0
+
+;; vector eval-path (no fused DAG) — goes through ray_abs_fn per element.
+(nil? (at (abs Va) 0)) -- true
+(at (abs Va) 1)        -- 5
+(at (abs Va) 2)        -- 5