diff --git a/.claude/rules.md b/.claude/rules.md index e478e7e7..13b373bb 100644 --- a/.claude/rules.md +++ b/.claude/rules.md @@ -57,6 +57,7 @@ TypeScript-to-native compiler using LLVM IR. Compiles .ts/.js files to native bi | Dir | Purpose | | ----------------------------------------- | --------------------------------------------------------------------------------- | +| `src/semantic/` | Semantic analysis passes run before codegen (closure mutation, union types) | | `src/codegen/` | LLVM IR code generation (the core) | | `src/codegen/expressions/method-calls.ts` | Central dispatcher for all `object.method()` calls | | `src/codegen/types/collections/string/` | String method IR generators (manipulation.ts, search.ts, split.ts, etc.) | @@ -205,6 +206,7 @@ Existing bridges: `regex-bridge.c`, `yyjson-bridge.c`, `os-bridge.c`, `child-pro 5. **Type cast field order must match FULL struct layout** — when the type extends a parent interface, the struct includes ALL parent fields. `as { name, closureInfo }` on a `LiftedFunction extends FunctionNode` (10 fields) reads index 1 instead of index 9. Include every field. 6. **`ret void` not `unreachable`** at end of void functions 7. **Class structs: boolean is `i1`; Interface structs: boolean is `double`** +8. **Set feature flags when emitting gated extern calls** — runtime declarations for C bridges (yyjson, curl, etc.) are conditionally emitted behind flags like `usesJson`, `usesCurl`. Any code path that emits `call @csyyjson_*` must call `ctx.setUsesJson(true)`, etc. Missing this causes "undefined value" errors from `opt` because the `declare` is never emitted. ## Interface Field Iteration @@ -257,3 +259,33 @@ Parser preserves string values in `EnumMember.stringValue` and marks `EnumDeclar ## Async/Await Type Tracking `allocateAwaitResult` in `variable-allocator.ts` must inspect the awaited expression to determine the correct SymbolKind. Default is `i8*`/string, but `Promise.all()` resolves to `%ObjectArray*`. For each new async API that resolves to a specific type, add a detection case to `allocateAwaitResult`. + +## Semantic Analysis Passes + +Semantic passes live in `src/semantic/` and run before codegen (called from `LLVMGenerator.generateParts()`). +They catch errors that would produce silently wrong native code — the native compiler can't throw exceptions +at runtime, so these must be compile-time errors. + +Current passes: + +- **`closure-mutation-checker.ts`** — ChadScript closures capture by value. Mutating a variable after capture + produces silently wrong results. This pass detects post-capture assignments and emits a compile error. +- **`union-type-checker.ts`** — Type alias unions like `type Mixed = string | number` bypass the inline union + check. This pass resolves aliases and rejects unions whose members map to different LLVM representations. + +To add a new semantic pass: create `src/semantic/your-check.ts`, export a `checkX(ast: AST): void` function, +and call it from `generateParts()` in `llvm-generator.ts`. + +## LLVMGenerator.reset() + +`LLVMGenerator.reset()` calls `super.reset()` to reset all `BaseGenerator` fields, then clears its own +additional fields. If you add new per-function state to either class, add the reset in the right place: +base fields in `BaseGenerator.reset()`, LLVMGenerator-only fields in the override after `super.reset()`. + +## Expression Orchestrator — No Silent Nulls + +`orchestrator.ts` must **never** silently generate null pointers (`inttoptr i64 0 to i8*`) for unrecognized +expressions. These nulls are UB that LLVM `-O2` can exploit to prune unrelated code paths. Both fallback +paths (empty type, unsupported type) now call `ctx.emitError()` which is `never`-typed — it exits the +compiler immediately. If a new expression type is added to the parser, add a handler in the orchestrator; +don't rely on a fallback. diff --git a/docs/language/features.md b/docs/language/features.md index ad268cd8..602f9e62 100644 --- a/docs/language/features.md +++ b/docs/language/features.md @@ -36,7 +36,7 @@ | `async`/`await` | Supported | | Default parameters | Supported | | Rest parameters (`...args`) | Supported | -| Closures | Supported (capture by value, not by reference) | +| Closures | Supported (capture by value; post-capture mutation is a compile error) | | `declare function` (FFI) | Supported (see [FFI](#foreign-function-interface-ffi)) | | Async generators / `for await...of` | Not supported | @@ -183,15 +183,16 @@ Strings are null-terminated C strings, not JavaScript's UTF-16 strings. They wor ## Closures -Arrow functions and nested functions can capture outer variables, but captures are **by value, not by reference**. If you mutate a variable after a closure captures it, the closure won't see the change: +Arrow functions and nested functions can capture outer variables, but captures are **by value, not by reference**. Mutating a variable after a closure captures it is a **compile error**: ```typescript let x = 1; const f = () => console.log(x); -x = 2; -f(); // prints 1, not 2 +x = 2; // error: variable 'x' is reassigned after being captured by a closure ``` +This is enforced at compile time because the closure would silently see the old value — a common source of bugs in native code where there's no runtime to help. + Inline lambdas with captures work in array methods: ```typescript diff --git a/src/codegen/expressions/access/index.ts b/src/codegen/expressions/access/index.ts index 0c01d6b3..9d8d8e3f 100644 --- a/src/codegen/expressions/access/index.ts +++ b/src/codegen/expressions/access/index.ts @@ -35,6 +35,7 @@ export interface IndexAccessGeneratorContext { isStringExpression(expr: Expression): boolean; readonly stringGen: IStringGenerator; ensureDouble(value: string): string; + setUsesJson(value: boolean): void; } /** @@ -92,6 +93,7 @@ export class IndexAccessGenerator { exprObjBase.type === "variable" && this.ctx.symbolTable.isJSON((expr.object as VariableNode).name) ) { + this.ctx.setUsesJson(true); return this.generateJSONArrayIndex(expr, params); } diff --git a/src/codegen/expressions/orchestrator.ts b/src/codegen/expressions/orchestrator.ts index 16f7758b..c9579d97 100644 --- a/src/codegen/expressions/orchestrator.ts +++ b/src/codegen/expressions/orchestrator.ts @@ -47,6 +47,7 @@ interface ExpressionOrchestratorContext { setLastInlineLambdaEnvPtr(ptr: string | null): void; setLastTypeAssertionSourceVar(name: string | null): void; emitWarning(message: string, loc?: { line: number; column: number }, suggestion?: string): void; + emitError(message: string, loc?: { line: number; column: number }, suggestion?: string): never; } /** @@ -119,16 +120,13 @@ export class ExpressionGenerator { generate(expr: Expression, params: string[]): string { const exprTyped = expr as { type: string }; if (!exprTyped.type || exprTyped.type.length === 0) { - // Strict: expressions must have a type. An empty type indicates a parser - // or AST construction bug — surface it instead of silently returning null. - this.ctx.emitWarning( - "expression has empty type — this likely indicates a parser bug, treating as null", + // Hard error: expressions must have a type. An empty type indicates a parser + // or AST construction bug. Previously this silently generated a null pointer, + // which LLVM -O2 could exploit as UB to prune unrelated code paths. + this.ctx.emitError( + "expression has empty type — this likely indicates a parser bug", (expr as { loc?: { line: number; column: number } }).loc, ); - const temp = this.ctx.nextTemp(); - this.ctx.emit(`${temp} = inttoptr i64 0 to i8*`); - this.ctx.setVariableType(temp, "i8*"); - return temp; } // Literals if (exprTyped.type === "number") { @@ -323,15 +321,12 @@ export class ExpressionGenerator { return this.indexAccessGen.generateAssignment(expr as IndexAccessAssignmentNode, params); } - this.ctx.emitWarning( + // Hard error: unsupported expression types must not silently produce null pointers. + // A null here would be UB that LLVM -O2 can exploit to prune unrelated code. + this.ctx.emitError( "unsupported expression type: " + exprTyped.type, (expr as { loc?: { line: number; column: number } }).loc, - "this expression will evaluate to null", ); - const temp = this.ctx.nextTemp(); - this.ctx.emit(`${temp} = inttoptr i64 0 to i8*`); - this.ctx.setVariableType(temp, "i8*"); - return temp; } /** diff --git a/src/codegen/llvm-generator.ts b/src/codegen/llvm-generator.ts index 1a0da1eb..f9643c83 100644 --- a/src/codegen/llvm-generator.ts +++ b/src/codegen/llvm-generator.ts @@ -118,6 +118,8 @@ import type { TypeChecker } from "../typescript/type-checker.js"; import { InterfaceStructGenerator } from "./types/interface-struct-generator.js"; import { JsonObjectMeta } from "./expressions/access/member.js"; import type { TargetInfo } from "../target-types.js"; +import { checkClosureMutations } from "../semantic/closure-mutation-checker.js"; +import { checkUnionTypes } from "../semantic/union-type-checker.js"; export interface SemaSymbolData { names: string[]; @@ -1717,18 +1719,8 @@ export class LLVMGenerator extends BaseGenerator implements IGeneratorContext { } reset(): void { - this.tempCounter = 0; - this.labelCounter = 0; - this.currentLabel = "entry"; - this.output.length = 0; - this.outputIsTerminator.length = 0; - this.outputCount = 0; - this.thisPointer = null; - this.currentClassName = null; - this.currentFunctionReturnType = "double"; - this.symbolTable.clearLocals(); - this.variableTypes.clear(); - this.expressionTypes.clear(); + super.reset(); + // LLVMGenerator-specific fields not in BaseGenerator this.stringBuilderSlen.clear(); this.stringBuilderScap.clear(); } @@ -2357,6 +2349,9 @@ export class LLVMGenerator extends BaseGenerator implements IGeneratorContext { } generateParts(): string[] { + checkClosureMutations(this.ast); + checkUnionTypes(this.ast); + const irParts: string[] = []; const interfaceStructDefs = this.interfaceStructGen.generateStructTypeDefinitions(); diff --git a/src/semantic/closure-mutation-checker.ts b/src/semantic/closure-mutation-checker.ts new file mode 100644 index 00000000..e8232bf7 --- /dev/null +++ b/src/semantic/closure-mutation-checker.ts @@ -0,0 +1,372 @@ +// Closure mutation checker — semantic pass run before IR generation. +// ChadScript closures capture by value, so mutations to a variable after it has been +// captured produce silently incorrect results. This pass detects such mutations and +// turns them into a compile error with a clear message. + +import { ClosureAnalyzer } from "../codegen/infrastructure/closure-analyzer.js"; +import type { + AST, + Statement, + Expression, + BlockStatement, + VariableDeclaration, + AssignmentStatement, + IfStatement, + WhileStatement, + DoWhileStatement, + ForStatement, + ForOfStatement, + TryStatement, + SwitchStatement, + ReturnStatement, + ThrowStatement, + ArrowFunctionNode, + ObjectProperty, + MapEntry, + SourceLocation, +} from "../ast/types.js"; + +export function checkClosureMutations(ast: AST): void { + const checker = new ClosureMutationChecker(); + checker.checkAST(ast); +} + +class ClosureMutationChecker { + private analyzer: ClosureAnalyzer; + + constructor() { + this.analyzer = new ClosureAnalyzer(); + } + + checkAST(ast: AST): void { + // Walk all top-level items in source order. + // topLevelItems is the combined ordered list of declarations + expressions. + const items = ast.topLevelItems; + if (items && items.length > 0) { + this.walkStatements(items as Statement[], [], []); + } + + // Walk each standalone function body (fresh scope per function). + for (let i = 0; i < ast.functions.length; i++) { + const fn = ast.functions[i]; + // Function params are in scope for the entire body. + this.walkBlock(fn.body, fn.params.slice(), []); + } + + // Walk each class method body (fresh scope per method). + for (let i = 0; i < ast.classes.length; i++) { + const cls = ast.classes[i]; + for (let j = 0; j < cls.methods.length; j++) { + const method = cls.methods[j]; + this.walkBlock(method.body, method.params.slice(), []); + } + } + } + + private walkStatements( + stmts: Statement[], + scopeVarNames: string[], + capturedNames: string[], + ): void { + for (let i = 0; i < stmts.length; i++) { + this.walkStatement(stmts[i], scopeVarNames, capturedNames); + } + } + + private walkBlock(block: BlockStatement, scopeVarNames: string[], capturedNames: string[]): void { + this.walkStatements(block.statements, scopeVarNames, capturedNames); + } + + private walkStatement(stmt: Statement, scopeVarNames: string[], capturedNames: string[]): void { + const s = stmt as { type: string }; + const stype = s.type; + + if (stype === "variable_declaration") { + const decl = stmt as VariableDeclaration; + // Scan the initializer for arrow functions before adding the var to scope + // (the variable is not in scope inside its own initializer). + if (decl.value !== null && decl.value !== undefined) { + this.scanExprForCaptures(decl.value as Expression, scopeVarNames, capturedNames); + } + scopeVarNames.push(decl.name); + } else if (stype === "assignment") { + const assign = stmt as AssignmentStatement; + // Simple-name reassignment after capture is the error we're looking for. + // Member-access assignments (obj.x = y) don't reassign the binding itself. + if (capturedNames.indexOf(assign.name) !== -1) { + this.reportError(assign.name, assign.loc); + } + this.scanExprForCaptures(assign.value, scopeVarNames, capturedNames); + } else if (stype === "if") { + const ifStmt = stmt as IfStatement; + this.scanExprForCaptures(ifStmt.condition, scopeVarNames, capturedNames); + // Pass a scope copy into each branch so declarations there don't escape. + // capturedNames is shared: captures inside a branch still protect against + // mutations that appear later in the outer scope. + this.walkBlock(ifStmt.thenBlock, scopeVarNames.slice(), capturedNames); + if (ifStmt.elseBlock !== null && ifStmt.elseBlock !== undefined) { + this.walkBlock(ifStmt.elseBlock, scopeVarNames.slice(), capturedNames); + } + } else if (stype === "while") { + const whileStmt = stmt as WhileStatement; + this.scanExprForCaptures(whileStmt.condition, scopeVarNames, capturedNames); + this.walkBlock(whileStmt.body, scopeVarNames.slice(), capturedNames); + } else if (stype === "do_while") { + const doWhileStmt = stmt as DoWhileStatement; + this.walkBlock(doWhileStmt.body, scopeVarNames.slice(), capturedNames); + this.scanExprForCaptures(doWhileStmt.condition, scopeVarNames, capturedNames); + } else if (stype === "for") { + const forStmt = stmt as ForStatement; + // init can declare a new loop variable; give it a fresh scope copy. + const forScope = scopeVarNames.slice(); + if (forStmt.init !== null && forStmt.init !== undefined) { + this.walkStatement(forStmt.init as Statement, forScope, capturedNames); + } + if (forStmt.condition !== null && forStmt.condition !== undefined) { + this.scanExprForCaptures(forStmt.condition, forScope, capturedNames); + } + this.walkBlock(forStmt.body, forScope.slice(), capturedNames); + if (forStmt.update !== null && forStmt.update !== undefined) { + const upd = forStmt.update as { type: string }; + if (upd.type === "assignment") { + this.walkStatement(forStmt.update as Statement, forScope, capturedNames); + } else { + this.scanExprForCaptures(forStmt.update as Expression, forScope, capturedNames); + } + } + } else if (stype === "for_of") { + const forOfStmt = stmt as ForOfStatement; + this.scanExprForCaptures(forOfStmt.iterable, scopeVarNames, capturedNames); + const forOfScope = scopeVarNames.slice(); + forOfScope.push(forOfStmt.variableName); + if (forOfStmt.destructuredNames) { + // Explicit cast to string[] avoids the union type (string[] | undefined) + // that would confuse the native compiler's array index codegen. + const dnames = forOfStmt.destructuredNames as string[]; + for (let dn = 0; dn < dnames.length; dn++) { + forOfScope.push(dnames[dn]); + } + } + this.walkBlock(forOfStmt.body, forOfScope, capturedNames); + } else if (stype === "try") { + const tryStmt = stmt as TryStatement; + this.walkBlock(tryStmt.tryBlock, scopeVarNames.slice(), capturedNames); + if (tryStmt.catchBody !== null && tryStmt.catchBody !== undefined) { + const catchScope = scopeVarNames.slice(); + if (tryStmt.catchParam !== null && tryStmt.catchParam !== undefined) { + catchScope.push(tryStmt.catchParam as string); + } + this.walkBlock(tryStmt.catchBody, catchScope, capturedNames); + } + if (tryStmt.finallyBlock !== null && tryStmt.finallyBlock !== undefined) { + this.walkBlock(tryStmt.finallyBlock, scopeVarNames.slice(), capturedNames); + } + } else if (stype === "switch") { + const switchStmt = stmt as SwitchStatement; + this.scanExprForCaptures(switchStmt.discriminant, scopeVarNames, capturedNames); + for (let ci = 0; ci < switchStmt.cases.length; ci++) { + const c = switchStmt.cases[ci]; + if (c.test !== null && c.test !== undefined) { + this.scanExprForCaptures(c.test as Expression, scopeVarNames, capturedNames); + } + this.walkStatements(c.consequent, scopeVarNames.slice(), capturedNames); + } + } else if (stype === "return") { + const retStmt = stmt as ReturnStatement; + if (retStmt.value !== null && retStmt.value !== undefined) { + this.scanExprForCaptures(retStmt.value as Expression, scopeVarNames, capturedNames); + } + } else if (stype === "throw") { + const throwStmt = stmt as ThrowStatement; + this.scanExprForCaptures(throwStmt.argument, scopeVarNames, capturedNames); + } else if (stype === "block") { + this.walkBlock(stmt as BlockStatement, scopeVarNames.slice(), capturedNames); + } else if (stype !== "break" && stype !== "continue") { + // Expressions used as statements (call, method_call, new, await, etc.) + this.scanExprForCaptures(stmt as Expression, scopeVarNames, capturedNames); + } + } + + // Walk an expression searching for arrow function literals. When one is found: + // 1. Use ClosureAnalyzer to identify which outer-scope variables it captures. + // 2. Add those names to capturedNames so subsequent mutations are caught. + // 3. Recurse into the arrow body with a fresh scope (function boundary). + private scanExprForCaptures( + expr: Expression, + scopeVarNames: string[], + capturedNames: string[], + ): void { + const e = expr as { type: string }; + const etype = e.type; + + if (etype === "arrow_function") { + const arrow = expr as ArrowFunctionNode; + // Build a dummy-type parallel array — ClosureAnalyzer only uses names for + // free-variable detection; llvmType in the result is unused here. + const dummyTypes: string[] = []; + for (let i = 0; i < scopeVarNames.length; i++) { + dummyTypes.push("double"); + } + const info = this.analyzer.analyze( + arrow.params, + arrow.body, + scopeVarNames, + dummyTypes, + "check", + ); + for (let i = 0; i < info.captures.length; i++) { + // Explicit cast required — ObjectArray elements are i8* in native code; without + // the cast the native compiler can't GEP to the correct field offset for .name. + const cap = info.captures[i] as { name: string; llvmType: string }; + const capName = cap.name; + if (capturedNames.indexOf(capName) === -1) { + capturedNames.push(capName); + } + } + // Recurse into the arrow body as a new function scope. + const arrowBodyTyped = arrow.body as { type: string }; + if (arrowBodyTyped.type === "block") { + this.walkBlock(arrow.body as BlockStatement, arrow.params.slice(), []); + } + } else if (etype === "binary") { + // BinaryNode: { type, op, left, right } — must include op to get correct GEP index for left/right + const binExpr = expr as { type: string; op: string; left: Expression; right: Expression }; + this.scanExprForCaptures(binExpr.left, scopeVarNames, capturedNames); + this.scanExprForCaptures(binExpr.right, scopeVarNames, capturedNames); + } else if (etype === "unary") { + // UnaryNode: { type, op, operand } — must include op to get correct GEP index for operand + const unaryExpr = expr as { type: string; op: string; operand: Expression }; + this.scanExprForCaptures(unaryExpr.operand, scopeVarNames, capturedNames); + } else if (etype === "call") { + // CallNode: { type, name, args } — must include name to get correct GEP index for args + const callExpr = expr as { type: string; name: string; args: Expression[] }; + for (let i = 0; i < callExpr.args.length; i++) { + this.scanExprForCaptures(callExpr.args[i], scopeVarNames, capturedNames); + } + } else if (etype === "method_call") { + // MethodCallNode: { type, object, method, args } — must include method to get correct GEP index for args + const mcExpr = expr as { + type: string; + object: Expression; + method: string; + args: Expression[]; + }; + this.scanExprForCaptures(mcExpr.object, scopeVarNames, capturedNames); + for (let i = 0; i < mcExpr.args.length; i++) { + this.scanExprForCaptures(mcExpr.args[i], scopeVarNames, capturedNames); + } + } else if (etype === "member_access") { + const maExpr = expr as { type: string; object: Expression }; + this.scanExprForCaptures(maExpr.object, scopeVarNames, capturedNames); + } else if (etype === "index_access") { + const iaExpr = expr as { type: string; object: Expression; index: Expression }; + this.scanExprForCaptures(iaExpr.object, scopeVarNames, capturedNames); + this.scanExprForCaptures(iaExpr.index, scopeVarNames, capturedNames); + } else if (etype === "array") { + const arrExpr = expr as { type: string; elements: Expression[] }; + for (let i = 0; i < arrExpr.elements.length; i++) { + this.scanExprForCaptures(arrExpr.elements[i], scopeVarNames, capturedNames); + } + } else if (etype === "object") { + // ObjectProperty: { key: string; value: Expression } — must use named type so the + // native compiler generates a 2-field struct for GEP; anonymous inline types produce + // a 1-field struct and would read key instead of value. + const objExpr = expr as { type: string; properties: ObjectProperty[] }; + for (let i = 0; i < objExpr.properties.length; i++) { + const prop = objExpr.properties[i] as ObjectProperty; + this.scanExprForCaptures(prop.value, scopeVarNames, capturedNames); + } + } else if (etype === "template_literal") { + const tlExpr = expr as { type: string; parts: (string | Expression)[] }; + for (let i = 0; i < tlExpr.parts.length; i++) { + const part = tlExpr.parts[i]; + // Raw string segments have no .type; Expression nodes do. + const partTyped = part as { type: string }; + if (partTyped.type) { + this.scanExprForCaptures(part as Expression, scopeVarNames, capturedNames); + } + } + } else if (etype === "conditional") { + const condExpr = expr as { + type: string; + condition: Expression; + consequent: Expression; + alternate: Expression; + }; + this.scanExprForCaptures(condExpr.condition, scopeVarNames, capturedNames); + this.scanExprForCaptures(condExpr.consequent, scopeVarNames, capturedNames); + this.scanExprForCaptures(condExpr.alternate, scopeVarNames, capturedNames); + } else if (etype === "await") { + const awaitExpr = expr as { type: string; argument: Expression }; + this.scanExprForCaptures(awaitExpr.argument, scopeVarNames, capturedNames); + } else if (etype === "new") { + // NewNode: { type, className, args } — must include className to get correct GEP index for args + const newExpr = expr as { type: string; className: string; args: Expression[] }; + for (let i = 0; i < newExpr.args.length; i++) { + this.scanExprForCaptures(newExpr.args[i], scopeVarNames, capturedNames); + } + } else if (etype === "type_assertion") { + const taExpr = expr as { type: string; expression: Expression }; + this.scanExprForCaptures(taExpr.expression, scopeVarNames, capturedNames); + } else if (etype === "spread_element") { + const seExpr = expr as { type: string; argument: Expression }; + this.scanExprForCaptures(seExpr.argument, scopeVarNames, capturedNames); + } else if (etype === "member_access_assignment") { + // MemberAccessAssignmentNode: { type, object, property, value } — must include property for correct GEP index + const maaExpr = expr as { + type: string; + object: Expression; + property: string; + value: Expression; + }; + this.scanExprForCaptures(maaExpr.object, scopeVarNames, capturedNames); + this.scanExprForCaptures(maaExpr.value, scopeVarNames, capturedNames); + } else if (etype === "index_access_assignment") { + const iaaExpr = expr as { + type: string; + object: Expression; + index: Expression; + value: Expression; + }; + this.scanExprForCaptures(iaaExpr.object, scopeVarNames, capturedNames); + this.scanExprForCaptures(iaaExpr.index, scopeVarNames, capturedNames); + this.scanExprForCaptures(iaaExpr.value, scopeVarNames, capturedNames); + } else if (etype === "map") { + // Same issue as object: must use named MapEntry type for correct 2-field GEP. + const mapExpr = expr as { type: string; entries: MapEntry[] }; + for (let i = 0; i < mapExpr.entries.length; i++) { + const entry = mapExpr.entries[i] as MapEntry; + this.scanExprForCaptures(entry.key, scopeVarNames, capturedNames); + this.scanExprForCaptures(entry.value, scopeVarNames, capturedNames); + } + } else if (etype === "set") { + const setExpr = expr as { type: string; values: Expression[] }; + for (let i = 0; i < setExpr.values.length; i++) { + this.scanExprForCaptures(setExpr.values[i], scopeVarNames, capturedNames); + } + } + // Leaves: variable, number, string, boolean, null, undefined, regex, this, super — no sub-expressions. + } + + private reportError(varName: string, loc?: SourceLocation): void { + let msg = ""; + if (loc !== null && loc !== undefined) { + const file = loc.file || ""; + msg += + file + + ":" + + loc.line + + ":" + + (loc.column + 1) + + ": error: variable '" + + varName + + "' is captured by a closure but reassigned after capture\n"; + } else { + msg += + "error: variable '" + varName + "' is captured by a closure but reassigned after capture\n"; + } + msg += " note: ChadScript closures capture by value; the closure will not see this change\n"; + console.error(msg); + process.exit(1); + } +} diff --git a/src/semantic/union-type-checker.ts b/src/semantic/union-type-checker.ts new file mode 100644 index 00000000..8ab0408e --- /dev/null +++ b/src/semantic/union-type-checker.ts @@ -0,0 +1,136 @@ +// Union type checker — semantic pass that rejects unsafe union type aliases +// used as function/method parameter types. +// +// The existing checkUnsafeUnionType (called by SemanticAnalyzer) catches inline +// unions with different LLVM representations (e.g., `string | number`). But type +// alias unions like `type Mixed = string | number` bypass that check because the +// parameter type string is just "Mixed" (no " | " to split on). +// +// This pass resolves type aliases and checks whether their members would map to +// different LLVM types. When they do, the codegen emits the alias name literally +// as the LLVM param type, which defaults to i8* — causing a segfault if the +// caller passes a value with a different representation (e.g., double for number). + +import type { AST, SourceLocation } from "../ast/types.js"; +import { tsTypeToLlvm } from "../codegen/infrastructure/type-system.js"; + +export function checkUnionTypes(ast: AST): void { + const checker = new UnionTypeChecker(ast); + checker.check(); +} + +class UnionTypeChecker { + private ast: AST; + // Names of type aliases whose union members have different LLVM representations + private unsafeAliases: string[]; + + constructor(ast: AST) { + this.ast = ast; + this.unsafeAliases = []; + this.buildUnsafeAliasIndex(); + } + + // Pre-compute which type alias names resolve to unions with mixed LLVM types. + private buildUnsafeAliasIndex(): void { + if (!this.ast.typeAliases) return; + for (let i = 0; i < this.ast.typeAliases.length; i++) { + const alias = this.ast.typeAliases[i]; + const members = alias.unionMembers; + if (!members || members.length < 2) continue; + + // Collect LLVM types for non-null members + const llvmTypes: string[] = []; + for (let j = 0; j < members.length; j++) { + const m = members[j].trim(); + if (m === "null" || m === "undefined") continue; + llvmTypes.push(tsTypeToLlvm(m)); + } + if (llvmTypes.length < 2) continue; + + // Check if any member has a different LLVM type than the first + let hasMixed = false; + for (let j = 1; j < llvmTypes.length; j++) { + if (llvmTypes[j] !== llvmTypes[0]) { + hasMixed = true; + break; + } + } + if (hasMixed) { + this.unsafeAliases.push(alias.name); + } + } + } + + private isUnsafeAlias(typeName: string): boolean { + let name = typeName; + if (name.endsWith("[]")) { + name = name.substring(0, name.length - 2); + } + return this.unsafeAliases.indexOf(name) !== -1; + } + + check(): void { + // Check standalone function parameters + for (let i = 0; i < this.ast.functions.length; i++) { + const fn = this.ast.functions[i]; + if (fn.declare) continue; + this.checkParams(fn.name, fn.paramTypes, fn as { loc?: SourceLocation }); + } + + // Check class method parameters + for (let i = 0; i < this.ast.classes.length; i++) { + const cls = this.ast.classes[i]; + for (let j = 0; j < cls.methods.length; j++) { + const method = cls.methods[j]; + const qualName = cls.name + "." + method.name; + this.checkParams(qualName, method.paramTypes, method as { loc?: SourceLocation }); + } + } + } + + private checkParams( + funcName: string, + paramTypes: string[] | undefined, + locHolder: { loc?: SourceLocation }, + ): void { + if (!paramTypes) return; + for (let i = 0; i < paramTypes.length; i++) { + if (this.isUnsafeAlias(paramTypes[i])) { + this.reportError(funcName, paramTypes[i], locHolder.loc); + } + } + } + + private reportError(funcName: string, aliasName: string, loc: SourceLocation | undefined): void { + let msg = ""; + if (loc !== null && loc !== undefined) { + const file = loc.file || ""; + msg += + file + + ":" + + loc.line + + ":" + + (loc.column + 1) + + ": error: in function '" + + funcName + + "', parameter type '" + + aliasName + + "' is a union type alias with mixed representations\n"; + } else { + msg += + "error: in function '" + + funcName + + "', parameter type '" + + aliasName + + "' is a union type alias with mixed representations\n"; + } + msg += + " note: '" + + aliasName + + "' is a type alias for a union whose members have different native types (e.g., i8* vs double)\n"; + msg += + " note: this will be miscompiled and segfault at runtime. Use a common base interface or separate the types.\n"; + console.error(msg); + process.exit(1); + } +} diff --git a/tests/fixtures/closures/closure-capture-by-value-ok.ts b/tests/fixtures/closures/closure-capture-by-value-ok.ts new file mode 100644 index 00000000..b83cdbc7 --- /dev/null +++ b/tests/fixtures/closures/closure-capture-by-value-ok.ts @@ -0,0 +1,13 @@ +// @test-description: closure captures variable with no post-capture mutation — valid +// Reassignment is fine as long as it happens BEFORE the closure is created. +// The checker only errors on reassignment AFTER capture. +function runTest(): void { + let threshold = 3; + threshold = 5; // reassigned before any closure captures it — no error + const nums = [1, 2, 3, 4, 5, 6]; + const big = nums.filter((x) => x > threshold); // captures threshold (value 5) + if (big.length === 1 && big[0] === 6) { + console.log("TEST_PASSED"); + } +} +runTest(); diff --git a/tests/fixtures/closures/closure-capture-mutation-error.ts b/tests/fixtures/closures/closure-capture-mutation-error.ts new file mode 100644 index 00000000..b1fef8c1 --- /dev/null +++ b/tests/fixtures/closures/closure-capture-mutation-error.ts @@ -0,0 +1,6 @@ +// @test-compile-error: variable 'x' is captured by a closure but reassigned after capture +// @test-description: reassigning a captured variable is a compile error +let x = 1; +const f = () => console.log(x); +x = 2; +f(); diff --git a/tests/fixtures/types/union-non-nullable-error.ts b/tests/fixtures/types/union-non-nullable-error.ts new file mode 100644 index 00000000..353a419d --- /dev/null +++ b/tests/fixtures/types/union-non-nullable-error.ts @@ -0,0 +1,6 @@ +// @test-compile-error: has members with different native representations +// @test-description: inline union types with different representations are a compile error +function process(x: string | number): void { + console.log("unreachable"); +} +process("hello"); diff --git a/tests/fixtures/types/union-type-alias-error.ts b/tests/fixtures/types/union-type-alias-error.ts new file mode 100644 index 00000000..f14aa1b0 --- /dev/null +++ b/tests/fixtures/types/union-type-alias-error.ts @@ -0,0 +1,9 @@ +// @test-compile-error: union type alias with mixed representations +// @test-description: type alias unions with mixed representations are a compile error +type StringOrNumber = string | number; + +function display(val: StringOrNumber): void { + console.log("value"); +} + +display("hello");