diff --git a/src/utils/__tests__/text-normalization.spec.ts b/src/utils/__tests__/text-normalization.spec.ts index e672617d18b..028a379c77c 100644 --- a/src/utils/__tests__/text-normalization.spec.ts +++ b/src/utils/__tests__/text-normalization.spec.ts @@ -29,6 +29,17 @@ describe("Text normalization utilities", () => { const input = "Let\u2019s test this\u2014with some \u201Cfancy\u201D punctuation\u2026 and spaces" expect(normalizeString(input)).toBe('Let\'s test this-with some "fancy" punctuation... and spaces') }) + + it("normalizes middle dot and arrow characters", () => { + expect(normalizeString("migrated \u00B7 19 remaining")).toBe("migrated - 19 remaining") + expect(normalizeString("convert (1\u21922)")).toBe("convert (1->2)") + expect(normalizeString("Anthropic \u2194 OpenAI")).toBe("Anthropic <-> OpenAI") + expect(normalizeString("item \u2190 NEW")).toBe("item <- NEW") + }) + + it("strips variation selectors", () => { + expect(normalizeString("\u26A0\uFE0F Warning")).toBe("\u26A0 Warning") + }) }) describe("unescapeHtmlEntities", () => { diff --git a/src/utils/text-normalization.ts b/src/utils/text-normalization.ts index 9e25d140c4e..297cf675ae2 100644 --- a/src/utils/text-normalization.ts +++ b/src/utils/text-normalization.ts @@ -15,6 +15,11 @@ export const NORMALIZATION_MAPS = { "\u2014": "-", // Em dash "\u2013": "-", // En dash "\u00A0": " ", // Non-breaking space + "\u00B7": "-", // Middle dot + "\u2192": "->", // Right arrow + "\u2190": "<-", // Left arrow + "\u2194": "<->", // Left-right arrow + "\uFE0F": "", // Variation selector (strip it) }, }