From 765c6c876344e19372f5c8ae571b420b6305306f Mon Sep 17 00:00:00 2001 From: djstrong Date: Tue, 17 Feb 2026 22:46:31 +0100 Subject: [PATCH 1/5] chore: update .gitignore and modify CSV conversion tests to enforce single-column format only - Updated tests in cli.test.ts and convert-csv-command.test.ts to use single-column CSV files. - Refactored convert-csv-command.ts to reject multi-column CSV formats and adjusted documentation accordingly. --- .gitignore | 3 + apps/ensrainbow/src/cli.test.ts | 6 +- .../src/commands/convert-csv-command.test.ts | 113 ++++-------------- .../src/commands/convert-csv-command.ts | 66 +++------- .../fixtures/test_labels_invalid_hash.csv | 4 - .../ensrainbow/concepts/creating-files.mdx | 16 +-- 6 files changed, 49 insertions(+), 159 deletions(-) delete mode 100644 apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv diff --git a/.gitignore b/.gitignore index 481476f2d..8a712d295 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,6 @@ apps/ensrainbow/data # fallback-ensapi dist apps/fallback-ensapi/dist +apps/ensrainbow/data* +apps/ensrainbow/temp* +apps/ensrainbow/v2* diff --git a/apps/ensrainbow/src/cli.test.ts b/apps/ensrainbow/src/cli.test.ts index 058b33fd1..958206e28 100644 --- a/apps/ensrainbow/src/cli.test.ts +++ b/apps/ensrainbow/src/cli.test.ts @@ -277,7 +277,7 @@ describe("CLI", () => { // Successful convert with args (convert-sql always creates version 0) // To test version 1, we need to use convert command with existing database // But for this test, we'll create version 0 and then manually test the ingestion failure - const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); + const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); const tempDbDirForV1 = join(tempDir, "temp-db-for-v1"); const version0FileForV1 = join(tempDir, "test_ens_names_0_for_v1.ensrainbow"); @@ -340,7 +340,7 @@ describe("CLI", () => { // Create an ensrainbow file with label set version 2 // To create version 2, we need to create version 0, ingest it, create version 1, ingest it, then create version 2 - const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); + const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); const labelSetId = "test-ens-names"; // Create temporary directory for building up versions sequentially @@ -453,7 +453,7 @@ describe("CLI", () => { const thirdInputFile = join(tempDir, "different_label_set_id_1.ensrainbow"); // Create an ensrainbow file with different label set id - const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); + const csvInputFile = join(TEST_FIXTURES_DIR, "test_labels_1col.csv"); const labelSetId = "different-label-set-id"; // Different from test-ens-names // Create temporary directory for version 0 database diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts index 9825d396d..f1910eb2c 100644 --- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts +++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts @@ -63,51 +63,33 @@ describe("convert-csv-command", () => { await db.close(); }); - it("should convert two column CSV with provided hashes and ingest successfully", async () => { + it("should reject two-column CSV (multi-column formats are not supported)", async () => { const inputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); const outputFile = join(tempDir, "output_2col.ensrainbow"); - const dataDir = join(tempDir, "db_2col"); - // Convert CSV to ensrainbow format - await convertCsvCommand({ - inputFile, - outputFile, - labelSetId: "test-csv-two-col" as LabelSetId, - silent: true, - }); - - // Verify the output file was created - const stats = await stat(outputFile); - expect(stats.isFile()).toBe(true); - expect(stats.size).toBeGreaterThan(0); - - // Ingest the converted file into database - const cli = createCLI({ exitProcess: false }); - await cli.parse(["ingest-ensrainbow", "--input-file", outputFile, "--data-dir", dataDir]); - - const db = await ENSRainbowDB.open(dataDir); - expect(await db.validate()).toBe(true); - const recordsCount = await db.getPrecalculatedRainbowRecordCount(); - expect(recordsCount).toBe(10); - expect( - (await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("test123"))))?.label, - ).toBe("test123"); - expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null); - await db.close(); + await expect( + convertCsvCommand({ + inputFile, + outputFile, + labelSetId: "test-csv-two-col" as LabelSetId, + silent: true, + }), + ).rejects.toThrow( + /Expected 1 column \(label only\).*Multi-column CSV formats are not supported/, + ); }); - it("should fail when CSV has inconsistent column count", async () => { + it("should reject CSV with more than one column", async () => { const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_first.csv"); const outputFile = join(tempDir, "output_invalid.ensrainbow"); - // Convert CSV to ensrainbow format (should fail on inconsistent columns) await expect( convertCsvCommand({ inputFile, outputFile, labelSetId: "test-csv-invalid" as LabelSetId, }), - ).rejects.toThrow(/Failed on line 1: Expected 1 or 2 col/); + ).rejects.toThrow(/Expected 1 column \(label only\)/); }); it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => { @@ -150,20 +132,6 @@ describe("convert-csv-command", () => { expect(await db.getVersionedRainbowRecord(labelHashToBytes(labelhash("1234")))).toBe(null); await db.close(); }); - - it("should fail when CSV contains invalid labelhash format", async () => { - const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_hash.csv"); - const outputFile = join(tempDir, "output_invalid_hash.ensrainbow"); - - // Convert CSV to ensrainbow format (should fail on invalid hash format) - await expect( - convertCsvCommand({ - inputFile, - outputFile, - labelSetId: "test-csv-invalid-hash" as LabelSetId, - }), - ).rejects.toThrow(/Failed on line 2: Invalid labelHash/); - }); }); describe("Error handling", () => { @@ -562,22 +530,8 @@ describe("convert-csv-command", () => { it("should process all CSV rows including potential headers", async () => { const inputFile = join(tempDir, "with_header.csv"); const outputFile = join(tempDir, "output_header.ensrainbow"); - const csvContent = - "label,labelhash\nalice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2"; - await writeFile(inputFile, csvContent); - - // Should process the file (header will be treated as a regular row and fail validation) - // Actually, the header row will be processed and fail because "label" is not a valid hex hash - await expect( - convertCsvCommand({ - inputFile, - outputFile, - labelSetId: "test-header" as LabelSetId, - silent: true, - }), - ).rejects.toThrow(/Invalid labelHash/); - // For a proper test, let's create a CSV where the header is valid data + // Single-column CSV where the header is valid data const csvContentValid = "label\nlabel1\nlabel2"; await writeFile(inputFile, csvContentValid); @@ -603,14 +557,14 @@ describe("convert-csv-command", () => { await db.close(); }); - it("should handle CSV with malformed rows (extra columns)", async () => { + it("should reject CSV rows with extra columns", async () => { const inputFile = join(tempDir, "malformed_extra_cols.csv"); const outputFile = join(tempDir, "output_malformed.ensrainbow"); const csvContent = "alice\nbob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2,extra\ncharlie"; await writeFile(inputFile, csvContent); - // Should fail when column count is inconsistent + // Should fail because second row has more than 1 column await expect( convertCsvCommand({ inputFile, @@ -618,25 +572,7 @@ describe("convert-csv-command", () => { labelSetId: "test-malformed" as LabelSetId, silent: true, }), - ).rejects.toThrow(/Expected \d+ columns/); - }); - - it("should handle CSV with malformed rows (missing columns)", async () => { - const inputFile = join(tempDir, "malformed_missing_cols.csv"); - const outputFile = join(tempDir, "output_malformed2.ensrainbow"); - const csvContent = - "alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob\ncharlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de"; - await writeFile(inputFile, csvContent); - - // Should fail when column count is inconsistent - await expect( - convertCsvCommand({ - inputFile, - outputFile, - labelSetId: "test-malformed2" as LabelSetId, - silent: true, - }), - ).rejects.toThrow(/Expected \d+ columns/); + ).rejects.toThrow(/Expected 1 column \(label only\)/); }); it("should handle CSV with quoted fields containing commas", async () => { @@ -683,22 +619,23 @@ describe("convert-csv-command", () => { await db.close(); }); - it("should handle CSV with empty labelhash column (should fail validation)", async () => { - const inputFile = join(tempDir, "empty_hash.csv"); - const outputFile = join(tempDir, "output_empty_hash.ensrainbow"); + it("should reject CSV with two columns (label + labelhash not supported)", async () => { + const inputFile = join(tempDir, "two_columns.csv"); + const outputFile = join(tempDir, "output_two_columns.ensrainbow"); const csvContent = - "alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob,\ncharlie,0x87a213ce1ee769e28decedefb98f6fe48890a74ba84957ebf877fb591e37e0de"; + "alice,0x9c0257114eb9399a2985f8e75dad7600c5d89fe3824ffa99ec1c3eb8bf3b0501\nbob,0x38e47a7b719dce63662aeaf43440326f551b8a7ee198cee35cb5d517f2d296a2"; await writeFile(inputFile, csvContent); - // Should fail when labelhash is empty await expect( convertCsvCommand({ inputFile, outputFile, - labelSetId: "test-empty-hash" as LabelSetId, + labelSetId: "test-two-columns" as LabelSetId, silent: true, }), - ).rejects.toThrow(/LabelHash cannot be empty/); + ).rejects.toThrow( + /Expected 1 column \(label only\).*Multi-column CSV formats are not supported/, + ); }); }); }); diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts index 68f3e4e1f..bab18e9e0 100644 --- a/apps/ensrainbow/src/commands/convert-csv-command.ts +++ b/apps/ensrainbow/src/commands/convert-csv-command.ts @@ -1,8 +1,7 @@ /** * ENSRAINBOW CSV FILE CREATION COMMAND * - * Converts CSV files to .ensrainbow format with fast-csv - * Supports 1-column (label only) and 2-column (label,labelhash) formats + * Converts single-column CSV files (one label per line) to .ensrainbow format with fast-csv */ import { createReadStream, createWriteStream, rmSync, statSync } from "node:fs"; @@ -13,7 +12,7 @@ import { ClassicLevel } from "classic-level"; import ProgressBar from "progress"; import { labelhash } from "viem"; -import { type LabelHash, type LabelSetId, labelHashToBytes } from "@ensnode/ensnode-sdk"; +import { type LabelSetId, labelHashToBytes } from "@ensnode/ensnode-sdk"; import { ENSRainbowDB } from "../lib/database.js"; import { logger } from "../utils/logger.js"; @@ -313,39 +312,22 @@ async function initializeConversion( } /** - * Create rainbow record from parsed CSV row + * Create rainbow record from a single-column CSV row (label only). + * Labelhashes are always computed deterministically from labels. */ function createRainbowRecord(row: string[]): RainbowRecord { - const label = String(row[0]); - - if (row.length === 1) { - // Single column: compute labelhash using labelhash function - const labelHashBytes = labelHashToBytes(labelhash(label)); - return { - labelHash: labelHashBytes, - label: label, - }; - } else if (row.length === 2) { - // Two columns: validate labelhash format and use provided hash - // Trim whitespace from hash (metadata), but preserve label as-is - const providedHash = String(row[1]).trim(); - if (providedHash === "") { - throw new Error("LabelHash cannot be empty"); - } - const maybeLabelHash = providedHash.startsWith("0x") ? providedHash : `0x${providedHash}`; - try { - const labelHash = labelHashToBytes(maybeLabelHash as LabelHash); // performs labelhash format validation - return { - labelHash, - label, - }; - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error); - throw new Error(`Invalid labelHash: ${errorMessage}`); - } - } else { - throw new Error(`Expected 1 or 2 columns, but found ${row.length} columns`); + if (row.length !== 1) { + throw new Error( + `Expected 1 column (label only), but found ${row.length} columns. Multi-column CSV formats are not supported.`, + ); } + + const label = String(row[0]); + const labelHashBytes = labelHashToBytes(labelhash(label)); + return { + labelHash: labelHashBytes, + label, + }; } /** @@ -353,21 +335,12 @@ function createRainbowRecord(row: string[]): RainbowRecord { */ async function processRecord( row: string[], - expectedColumns: number, RainbowRecordType: any, outputStream: NodeJS.WritableStream, - lineNumber: number, existingDb: ENSRainbowDB | null, dedupDb: DeduplicationDB, stats: ConversionStats, ): Promise { - // Validate column count - if (row.length !== expectedColumns) { - throw new Error( - `Expected ${expectedColumns} columns, but found ${row.length} in line ${lineNumber}`, - ); - } - const rainbowRecord = createRainbowRecord(row); const label = rainbowRecord.label; const labelHashBytes = Buffer.from(rainbowRecord.labelHash); @@ -427,7 +400,6 @@ async function processCSVFile( stats: ConversionStats, progressBar: ProgressBar | null, ): Promise<{ totalLines: number; processedRecords: number }> { - let expectedColumns: number | null = null; let lineNumber = 0; let processedRecords = 0; let lastLoggedLine = 0; @@ -465,12 +437,6 @@ async function processCSVFile( return; } - // Detect column count on first non-empty row - if (expectedColumns === null) { - expectedColumns = row.length; - logger.info(`Detected ${expectedColumns} columns - SEQUENTIAL processing mode`); - } - // Log progress (less frequently to avoid logger crashes) if (lineNumber % progressInterval === 0 && lineNumber !== lastLoggedLine) { const currentTime = Date.now(); @@ -496,10 +462,8 @@ async function processCSVFile( // Process this one record const wasProcessed = await processRecord( row, - expectedColumns, RainbowRecordType, outputStream, - lineNumber, existingDb, dedupDb, stats, diff --git a/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv b/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv deleted file mode 100644 index 484983db9..000000000 --- a/apps/ensrainbow/test/fixtures/test_labels_invalid_hash.csv +++ /dev/null @@ -1,4 +0,0 @@ -validlabel,0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef -invalidhash,not-a-hex-hash -anotherlabel,0x123 -toolong,0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef123456789 diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx index 7435ae665..a6bb026fb 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx @@ -81,27 +81,17 @@ pnpm run convert \ - `--existing-db-path`: Path to existing ENSRainbow database to filter out existing labels from the generated ensrainbow file and determine the next label set version - `--silent`: Disable progress bar (useful for scripts and automated workflows) -### CSV Format Support +### CSV Format -The CSV converter supports two formats and expects CSV files **without a header row**. +The CSV converter expects a single-column CSV file with one label per line, **without a header row**. -#### Single Column Format (Label Only) ```csv ethereum vitalik ens ``` -The converter automatically computes labelhashes using the `labelhash()` function. - -#### Two Column Format (Label + Labelhash) -```csv -ethereum,0x541111248b45b7a8dc3f5579f630e74cb01456ea6ac067d3f4d793245a255155 -vitalik,0xaf2caa1c2ca1d027f1ac823b529d0a67cd144264b2789fa2ea4d63a67c7103cc -ens,0x5cee339e13375638553bdf5a6e36ba80fb9f6a4f0783680884d92b558aa471da -``` - -The converter validates the format of provided labelhashes (does not verify they match the label). +The converter automatically computes labelhashes from labels using the `labelhash()` function. This ensures all label-to-labelhash mappings are deterministically correct. ### Label Filtering From cabb653705ce2c86d19a7d4e6c115ccc61cba49b Mon Sep 17 00:00:00 2001 From: djstrong Date: Tue, 17 Feb 2026 22:57:04 +0100 Subject: [PATCH 2/5] add changeset --- .changeset/single-column-csv.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/single-column-csv.md diff --git a/.changeset/single-column-csv.md b/.changeset/single-column-csv.md new file mode 100644 index 000000000..8cfb9cb26 --- /dev/null +++ b/.changeset/single-column-csv.md @@ -0,0 +1,5 @@ +--- +"ensrainbow": major +--- + +Constrain CSV input to single-column format (label only). The two-column format (label + labelhash) is no longer supported. All labelhashes are now computed deterministically from labels, removing the risk of incorrect mappings from untrusted labelhash values. From 5004fd6c216019e9d2cb544ce5a738e89dba05eb Mon Sep 17 00:00:00 2001 From: djstrong Date: Tue, 17 Feb 2026 23:07:55 +0100 Subject: [PATCH 3/5] refactor(tests): remove redundant test for two-column CSV format - Removed the test case that checked for rejection of two-column CSV files, as it is now redundant with the existing test for multi-column CSV formats. - Simplified the label extraction in the CSV conversion function by removing unnecessary string conversion. --- .../src/commands/convert-csv-command.test.ts | 16 ---------------- .../src/commands/convert-csv-command.ts | 2 +- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts index f1910eb2c..1cc73264a 100644 --- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts +++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts @@ -63,22 +63,6 @@ describe("convert-csv-command", () => { await db.close(); }); - it("should reject two-column CSV (multi-column formats are not supported)", async () => { - const inputFile = join(TEST_FIXTURES_DIR, "test_labels_2col.csv"); - const outputFile = join(tempDir, "output_2col.ensrainbow"); - - await expect( - convertCsvCommand({ - inputFile, - outputFile, - labelSetId: "test-csv-two-col" as LabelSetId, - silent: true, - }), - ).rejects.toThrow( - /Expected 1 column \(label only\).*Multi-column CSV formats are not supported/, - ); - }); - it("should reject CSV with more than one column", async () => { const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_first.csv"); const outputFile = join(tempDir, "output_invalid.ensrainbow"); diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts index bab18e9e0..5c7f75f7a 100644 --- a/apps/ensrainbow/src/commands/convert-csv-command.ts +++ b/apps/ensrainbow/src/commands/convert-csv-command.ts @@ -322,7 +322,7 @@ function createRainbowRecord(row: string[]): RainbowRecord { ); } - const label = String(row[0]); + const label = row[0]; const labelHashBytes = labelHashToBytes(labelhash(label)); return { labelHash: labelHashBytes, From d1976942200cc4d1f2ba87f60b0a681b4dfe9e09 Mon Sep 17 00:00:00 2001 From: djstrong Date: Tue, 17 Feb 2026 23:18:10 +0100 Subject: [PATCH 4/5] update documentation for CSV conversion to specify single-column format - Clarified that the `convert` command processes single-column CSV files with one label per line. - Updated related sections in the FAQ and creating files documentation to reflect this change. --- .../ensrainbow/concepts/creating-files.mdx | 20 ++++++++----------- .../src/content/docs/ensrainbow/faq.mdx | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx index a6bb026fb..5505f2b9c 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/creating-files.mdx @@ -41,7 +41,7 @@ For detailed information about the file format structure, see the [Data Model](/ | Method | Input Format | Use Case | Command | |--------|-------------|----------|---------| -| **CSV Conversion** | CSV file (1 or 2 columns) | Building new ENS rainbow tables | `pnpm run convert` | +| **CSV Conversion** | Single-column CSV file (one label per line) | Building new ENS rainbow tables | `pnpm run convert` | | **SQL Conversion** | Gzipped SQL dump (`ens_names.sql.gz`) | Converting legacy ENS Subgraph data | `pnpm run convert-sql` | ### When to Use CSV Conversion @@ -55,7 +55,7 @@ For detailed information about the file format structure, see the [Data Model](/ ## Method 1: Converting from CSV Files -The `convert` command processes CSV files with flexible column formats. +The `convert` command processes single-column CSV files with one label per line. ### Command Syntax @@ -151,16 +151,12 @@ pnpm run convert \ ### How It Works -1. **Detects** CSV format automatically (1 or 2 columns) -2. **Streams** CSV parsing using fast-csv for memory efficiency -3. **Validates** column count and data format -4. **Computes** or validates labelhashes as needed - - For single-column format: Computes labelhash using the `labelhash()` function - - For two-column format: Validates the format of the provided labelhash (does not verify it matches the label) - - Invalid labelhashes are rejected if they don't meet format requirements (66 characters including "0x" prefix, lowercase hex, valid hex format) -5. **Filters** existing labels if `--existing-db-path` is provided -6. **Filters** duplicate labels within the same CSV file -7. **Writes** .ensrainbow file as output +1. **Streams** CSV parsing using fast-csv for memory efficiency +2. **Validates** that each row contains exactly one column (the label) +3. **Computes** labelhashes deterministically from labels using the `labelhash()` function +4. **Filters** existing labels if `--existing-db-path` is provided +5. **Filters** duplicate labels within the same CSV file +6. **Writes** .ensrainbow file as output ## Method 2: Migrating from ENS Subgraph diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx index ad63f4466..c37deb5e6 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/faq.mdx @@ -24,7 +24,7 @@ To stay informed about new versions, monitor the [Available Label Sets](/ensrain You can create your own `.ensrainbow` files from CSV data using the `convert` command, which generates new `.ensrainbow` files from your supplied CSV input. -1. **Prepare your data** as a CSV file with labels (1 column) or labels and labelhashes (2 columns) +1. **Prepare your data** as a single-column CSV file with one label per line 2. **Run the convert command:** ```bash pnpm run convert \ From c865e21a31a16de36360dd6a4e487a91a825ea70 Mon Sep 17 00:00:00 2001 From: djstrong Date: Wed, 18 Feb 2026 00:14:50 +0100 Subject: [PATCH 5/5] refactor(tests): remove test for invalid multi-column CSV and enhance error handling - Removed the test case for rejecting multi-column CSV files, as it is now redundant. - Improved error handling in the CSV conversion function to ensure proper cleanup of output streams on error. - Updated the import statement to include the WriteStream type for better type safety. --- .../src/commands/convert-csv-command.test.ts | 13 ------- .../src/commands/convert-csv-command.ts | 35 ++++++++++++++++--- .../fixtures/test_labels_invalid_first.csv | 3 -- 3 files changed, 31 insertions(+), 20 deletions(-) delete mode 100644 apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv diff --git a/apps/ensrainbow/src/commands/convert-csv-command.test.ts b/apps/ensrainbow/src/commands/convert-csv-command.test.ts index 1cc73264a..2f8f46ddb 100644 --- a/apps/ensrainbow/src/commands/convert-csv-command.test.ts +++ b/apps/ensrainbow/src/commands/convert-csv-command.test.ts @@ -63,19 +63,6 @@ describe("convert-csv-command", () => { await db.close(); }); - it("should reject CSV with more than one column", async () => { - const inputFile = join(TEST_FIXTURES_DIR, "test_labels_invalid_first.csv"); - const outputFile = join(tempDir, "output_invalid.ensrainbow"); - - await expect( - convertCsvCommand({ - inputFile, - outputFile, - labelSetId: "test-csv-invalid" as LabelSetId, - }), - ).rejects.toThrow(/Expected 1 column \(label only\)/); - }); - it("should handle CSV with special characters, emojis, unicode, and quoted fields", async () => { const inputFile = join(TEST_FIXTURES_DIR, "test_labels_special_chars.csv"); const outputFile = join(tempDir, "output_special.ensrainbow"); diff --git a/apps/ensrainbow/src/commands/convert-csv-command.ts b/apps/ensrainbow/src/commands/convert-csv-command.ts index 5c7f75f7a..59ea2f215 100644 --- a/apps/ensrainbow/src/commands/convert-csv-command.ts +++ b/apps/ensrainbow/src/commands/convert-csv-command.ts @@ -4,7 +4,7 @@ * Converts single-column CSV files (one label per line) to .ensrainbow format with fast-csv */ -import { createReadStream, createWriteStream, rmSync, statSync } from "node:fs"; +import { createReadStream, createWriteStream, rmSync, statSync, type WriteStream } from "node:fs"; import { dirname, join } from "node:path"; import { parse } from "@fast-csv/parse"; @@ -54,8 +54,18 @@ class DeduplicationDB { try { await this.db.get(key); return true; - } catch (_error) { - return false; + } catch (error: unknown) { + // Only treat a missing-key error as "not found"; + // rethrow I/O, corruption, LEVEL_LOCKED, or other unexpected errors + if ( + error != null && + typeof error === "object" && + "code" in error && + error.code === "LEVEL_NOT_FOUND" + ) { + return false; + } + throw error; } } @@ -534,13 +544,15 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom let dedupDb: DeduplicationDB | undefined; let tempDb: ClassicLevel | undefined; let temporaryDedupDir: string | null = null; + let outputStream: WriteStream | null = null; try { const { RainbowRecordType, - outputStream, + outputStream: stream, existingDb: db, } = await initializeConversion(options, labelSetVersion, outputFile, existingDb); + outputStream = stream; existingDb = db; // Create temporary deduplication database in the same directory as the output file @@ -599,6 +611,21 @@ export async function convertCsvCommand(options: ConvertCsvCommandOptions): Prom logger.error(`❌ CSV conversion failed: ${errorMessage}`); throw error; } finally { + // Clean up output stream if it wasn't gracefully ended (error path). + // After end(), writable is false, so this only triggers on error paths. + if (outputStream?.writable) { + try { + // Suppress errors from in-flight writes whose async I/O completes + // after destroy (e.g. "Cannot call write after a stream was destroyed"). + // On error paths the output file is incomplete anyway. + outputStream.on("error", () => {}); + outputStream.destroy(); + logger.info("Destroyed output stream on error path"); + } catch (error) { + logger.warn(`Failed to destroy output stream: ${error}`); + } + } + // Clean up deduplication database - close the wrapper first if (dedupDb !== undefined) { try { diff --git a/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv b/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv deleted file mode 100644 index 3d0b7b7e0..000000000 --- a/apps/ensrainbow/test/fixtures/test_labels_invalid_first.csv +++ /dev/null @@ -1,3 +0,0 @@ -label1,hash1,extra_column -validlabel -another_valid