From a6c46f7569dde2f0b8991400e1e52c2c2a74215f Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Sat, 7 Feb 2026 10:30:12 -0700 Subject: [PATCH] fix: add missing Unicode chars to text normalization map Add middle dot (U+00B7), arrows (U+2192, U+2190, U+2194), and variation selector (U+FE0F) to NORMALIZATION_MAPS.TYPOGRAPHIC so apply_diff can match files containing these characters. Includes test coverage for all new entries. --- src/utils/__tests__/text-normalization.spec.ts | 11 +++++++++++ src/utils/text-normalization.ts | 5 +++++ 2 files changed, 16 insertions(+) diff --git a/src/utils/__tests__/text-normalization.spec.ts b/src/utils/__tests__/text-normalization.spec.ts index e672617d18b..028a379c77c 100644 --- a/src/utils/__tests__/text-normalization.spec.ts +++ b/src/utils/__tests__/text-normalization.spec.ts @@ -29,6 +29,17 @@ describe("Text normalization utilities", () => { const input = "Let\u2019s test this\u2014with some \u201Cfancy\u201D punctuation\u2026 and spaces" expect(normalizeString(input)).toBe('Let\'s test this-with some "fancy" punctuation... and spaces') }) + + it("normalizes middle dot and arrow characters", () => { + expect(normalizeString("migrated \u00B7 19 remaining")).toBe("migrated - 19 remaining") + expect(normalizeString("convert (1\u21922)")).toBe("convert (1->2)") + expect(normalizeString("Anthropic \u2194 OpenAI")).toBe("Anthropic <-> OpenAI") + expect(normalizeString("item \u2190 NEW")).toBe("item <- NEW") + }) + + it("strips variation selectors", () => { + expect(normalizeString("\u26A0\uFE0F Warning")).toBe("\u26A0 Warning") + }) }) describe("unescapeHtmlEntities", () => { diff --git a/src/utils/text-normalization.ts b/src/utils/text-normalization.ts index 9e25d140c4e..297cf675ae2 100644 --- a/src/utils/text-normalization.ts +++ b/src/utils/text-normalization.ts @@ -15,6 +15,11 @@ export const NORMALIZATION_MAPS = { "\u2014": "-", // Em dash "\u2013": "-", // En dash "\u00A0": " ", // Non-breaking space + "\u00B7": "-", // Middle dot + "\u2192": "->", // Right arrow + "\u2190": "<-", // Left arrow + "\u2194": "<->", // Left-right arrow + "\uFE0F": "", // Variation selector (strip it) }, }