diff --git a/packages/layout-engine/painters/dom/src/features/math/CONTRIBUTING.md b/packages/layout-engine/painters/dom/src/features/math/CONTRIBUTING.md index 8c002d976d..3764f6da11 100644 --- a/packages/layout-engine/painters/dom/src/features/math/CONTRIBUTING.md +++ b/packages/layout-engine/painters/dom/src/features/math/CONTRIBUTING.md @@ -36,7 +36,9 @@ type MathObjectConverter = ( doc: Document, // For creating DOM elements convertChildren: (children: OmmlJsonNode[]) => DocumentFragment, // Recursively converts nested OMML content -) => Element | null; +) => Node | null; // Return a single Element for one atom, or a + // DocumentFragment when your converter produces + // multiple sibling elements (see m:r / math-run). ``` `convertChildren` is the important one. Pass it any child elements that contain nested math content (`m:e`, `m:num`, `m:sub`, etc.). It handles everything inside them, including other math objects. diff --git a/packages/layout-engine/painters/dom/src/features/math/converters/function.ts b/packages/layout-engine/painters/dom/src/features/math/converters/function.ts index 145c183af9..6aa879340b 100644 --- a/packages/layout-engine/painters/dom/src/features/math/converters/function.ts +++ b/packages/layout-engine/painters/dom/src/features/math/converters/function.ts @@ -1,4 +1,5 @@ import type { MathObjectConverter } from '../types.js'; +import { convertMathRunAsFunctionName } from './math-run.js'; const MATHML_NS = 'http://www.w3.org/1998/Math/MathML'; const FUNCTION_APPLY_OPERATOR = '\u2061'; @@ -37,6 +38,56 @@ function forceNormalMathVariant(root: ParentNode): void { } } +/** + * Structural MathML elements whose FIRST child is the "function-name base" + * when nested inside m:fName (e.g. m:limLow → , m:limUpp → , + * m:sSub → , etc.). Word's OMML2MML.XSL keeps the base text whole + * (e.g. "lim" as one ) even though it splits regular runs per-character. + */ +const BASE_BEARING_ELEMENTS = new Set([ + 'munder', + 'mover', + 'munderover', + 'msub', + 'msup', + 'msubsup', + 'mmultiscripts', // m:sPre inside m:fName +]); + +/** + * After per-character splitting in convertMathRun, the base of a nested + * limit/script inside m:fName comes out as multiple single-char siblings + * wrapped in an . Word's XSL keeps that base whole — merge the siblings + * back into a single if they all share the same (or no) mathvariant. + */ +function collapseFunctionNameBases(root: ParentNode): void { + for (const child of Array.from(root.children)) { + if (BASE_BEARING_ELEMENTS.has(child.localName)) { + const base = child.children[0]; + if (base) { + collapseMrowToSingleMi(base); + collapseFunctionNameBases(base); + } + } else { + collapseFunctionNameBases(child); + } + } +} + +function collapseMrowToSingleMi(container: Element): void { + const children = Array.from(container.children); + if (children.length < 2) return; + if (!children.every((c) => c.localName === 'mi')) return; + const variant = children[0]!.getAttribute('mathvariant'); + if (!children.every((c) => c.getAttribute('mathvariant') === variant)) return; + + const merged = container.ownerDocument!.createElementNS(MATHML_NS, 'mi'); + merged.textContent = children.map((c) => c.textContent ?? '').join(''); + if (variant) merged.setAttribute('mathvariant', variant); + container.insertBefore(merged, children[0]!); + for (const c of children) c.remove(); +} + /** * Convert m:func (function apply) to MathML. * @@ -59,7 +110,19 @@ export const convertFunction: MathObjectConverter = (node, doc, convertChildren) const wrapper = doc.createElementNS(MATHML_NS, 'mrow'); const functionNameRow = doc.createElementNS(MATHML_NS, 'mrow'); - functionNameRow.appendChild(convertChildren(functionName?.elements ?? [])); + // m:r children of m:fName stay whole (Word's OMML2MML.XSL keeps multi-letter + // function names like "sin" or "lim" as a single ). Non-m:r children — + // like a nested m:limLow — go through the normal recursive path. + for (const child of functionName?.elements ?? []) { + if (child.name === 'm:r') { + const atom = convertMathRunAsFunctionName(child, doc); + if (atom) functionNameRow.appendChild(atom); + } else { + const converted = convertChildren([child]); + if (converted.childNodes.length > 0) functionNameRow.appendChild(converted); + } + } + collapseFunctionNameBases(functionNameRow); forceNormalMathVariant(functionNameRow); if (functionNameRow.childNodes.length > 0) { diff --git a/packages/layout-engine/painters/dom/src/features/math/converters/math-run.ts b/packages/layout-engine/painters/dom/src/features/math/converters/math-run.ts index 79fddd2375..7f979563f9 100644 --- a/packages/layout-engine/painters/dom/src/features/math/converters/math-run.ts +++ b/packages/layout-engine/painters/dom/src/features/math/converters/math-run.ts @@ -46,8 +46,7 @@ const OPERATOR_CHARS = new Set([ '\u220C', // ∈, ∉, ∋, ∌ '\u2211', '\u220F', // ∑, ∏ - '\u221A', - '\u221E', // √, ∞ + '\u221A', // √ (radical sign — prefix operator) '\u2227', '\u2228', '\u2229', @@ -65,16 +64,70 @@ const OPERATOR_CHARS = new Set([ '\u2287', // ⊂, ⊃, ⊆, ⊇ ]); +type MathAtomTag = 'mi' | 'mo' | 'mn'; + +function isDigit(ch: string): boolean { + return ch >= '0' && ch <= '9'; +} + /** - * Classify a text string into MathML element type. - * - All-digit strings → (number) - * - Known operators → (operator) - * - Everything else → (identifier) + * Length in UTF-16 code units of the code point starting at `text[i]`. + * Handles surrogate pairs so astral-plane characters (e.g. mathematical + * italic U+1D465) don't get split into two bogus atoms. */ -function classifyMathText(text: string): 'mn' | 'mo' | 'mi' { - if (/^\d*\.?\d+$/.test(text)) return 'mn'; - if (text.length === 1 && OPERATOR_CHARS.has(text)) return 'mo'; - return 'mi'; +function codePointUnitLength(text: string, i: number): number { + const hi = text.charCodeAt(i); + if (hi >= 0xd800 && hi <= 0xdbff && i + 1 < text.length) { + const lo = text.charCodeAt(i + 1); + if (lo >= 0xdc00 && lo <= 0xdfff) return 2; + } + return 1; +} + +/** + * Split a math run's text into MathML atoms, matching Word's OMML2MML.XSL. + * + * Rules (ECMA-376 §22.1.2.116 example + Annex L.6.1.13): + * - Consecutive digits — optionally containing one decimal point between digits — + * group into a single ``. + * - Each recognized operator character becomes its own ``. + * - Every other character becomes its own ``. + * + * Example: `"n+1"` → `[n, +, 1]`. + */ +export function tokenizeMathText(text: string): Array<{ tag: MathAtomTag; content: string }> { + const atoms: Array<{ tag: MathAtomTag; content: string }> = []; + let i = 0; + while (i < text.length) { + const step = codePointUnitLength(text, i); + const ch = text.slice(i, i + step); + if (step === 1 && isDigit(ch)) { + let end = i + 1; + let sawDot = false; + while (end < text.length) { + const c = text[end]!; + if (isDigit(c)) { + end++; + continue; + } + if (c === '.' && !sawDot && end + 1 < text.length && isDigit(text[end + 1]!)) { + sawDot = true; + end++; + continue; + } + break; + } + atoms.push({ tag: 'mn', content: text.slice(i, end) }); + i = end; + } else if (step === 1 && OPERATOR_CHARS.has(ch)) { + atoms.push({ tag: 'mo', content: ch }); + i++; + } else { + atoms.push({ tag: 'mi', content: ch }); + i += step; + } + } + return atoms; } /** ECMA-376 m:sty → MathML mathvariant (§22.1.2 math run properties). */ @@ -115,47 +168,140 @@ function resolveMathVariant(rPr: OmmlJsonNode | undefined): string | null { return null; } +function extractText(node: OmmlJsonNode): string { + let text = ''; + for (const child of node.elements ?? []) { + if (child.name === 'm:t') { + for (const tc of child.elements ?? []) { + if (tc.type === 'text' && typeof tc.text === 'string') text += tc.text; + } + } + } + return text; +} + /** - * Convert an m:r (math run) element to MathML. + * Convert an m:r (math run) element to MathML atoms. * * m:r contains: * - m:rPr (math run properties: script, style, normal text flag) * - m:t (text content) * - Optionally w:rPr (WordprocessingML run properties for formatting) * - * The text is classified as , , or based on content. + * The run's text is split per-character into `` / `` / `` atoms + * per Word's OMML2MML.XSL. For a single-atom run (common case — a one-letter + * variable, single operator, or an all-digit number) the converter returns a + * single Element. For a multi-atom run (e.g. "→∞", "x+1") it returns a + * DocumentFragment whose children become siblings of the parent mrow. + * + * @spec ECMA-376 §22.1.2.116 (t) — example shows multi-char mixed runs as the + * normal authored shape; §22.1.2.58 (lit) implies operators are classified + * per-character by default. */ export const convertMathRun: MathObjectConverter = (node, doc) => { - const elements = node.elements ?? []; + const text = extractText(node); + if (!text) return null; - // Extract text from m:t children - let text = ''; - for (const child of elements) { - if (child.name === 'm:t') { - const textChildren = child.elements ?? []; - for (const tc of textChildren) { - if (tc.type === 'text' && typeof tc.text === 'string') { - text += tc.text; + const rPr = (node.elements ?? []).find((el) => el.name === 'm:rPr'); + const variant = resolveMathVariant(rPr); + const atoms = tokenizeMathText(text); + + const createAtom = (atom: { tag: MathAtomTag; content: string }): Element => { + const el = doc.createElementNS(MATHML_NS, atom.tag); + el.textContent = atom.content; + // Apply m:rPr-derived variant to every atom in the run. Omitted attribute + // means "use the MathML default" (italic for single-char , normal + // for multi-char //). + if (variant) el.setAttribute('mathvariant', variant); + return el; + }; + + if (atoms.length === 1) return createAtom(atoms[0]!); + + const fragment = doc.createDocumentFragment(); + for (const atom of atoms) fragment.appendChild(createAtom(atom)); + return fragment; +}; + +/** + * Tokenize a math run's text for the m:fName context: consecutive non-digit, + * non-operator characters stay grouped in one `` (so "log" in "log_2" + * remains a single identifier), while digits still group into `` and + * each operator character is its own ``. + * + * Matches Word's OMML2MML.XSL run-internal classification for m:fName + * content: `log_2` → `log_2`. + */ +function tokenizeFunctionNameText(text: string): Array<{ tag: MathAtomTag; content: string }> { + const atoms: Array<{ tag: MathAtomTag; content: string }> = []; + let i = 0; + while (i < text.length) { + const step = codePointUnitLength(text, i); + const ch = text.slice(i, i + step); + if (step === 1 && isDigit(ch)) { + let end = i + 1; + let sawDot = false; + while (end < text.length) { + const c = text[end]!; + if (isDigit(c)) { + end++; + continue; + } + if (c === '.' && !sawDot && end + 1 < text.length && isDigit(text[end + 1]!)) { + sawDot = true; + end++; + continue; } + break; } + atoms.push({ tag: 'mn', content: text.slice(i, end) }); + i = end; + } else if (step === 1 && OPERATOR_CHARS.has(ch)) { + atoms.push({ tag: 'mo', content: ch }); + i++; + } else { + // Group consecutive non-digit, non-operator code points into one . + let end = i + step; + while (end < text.length) { + const s = codePointUnitLength(text, end); + const c = text.slice(end, end + s); + if (s === 1 && (isDigit(c) || OPERATOR_CHARS.has(c))) break; + end += s; + } + atoms.push({ tag: 'mi', content: text.slice(i, end) }); + i = end; } } + return atoms; +} +/** + * Convert an m:r inside m:fName (m:func's function-name slot). Word's + * OMML2MML.XSL keeps each letter-sequence whole while still splitting out + * digits and operators — so `sin` stays `sin`, but `log_2` becomes + * `log_2`. + * + * Returns a single Element for single-atom runs or a DocumentFragment when + * the run emits multiple atoms. Returns null for empty text. + */ +export function convertMathRunAsFunctionName(node: OmmlJsonNode, doc: Document): Node | null { + const text = extractText(node); if (!text) return null; - const rPr = elements.find((el) => el.name === 'm:rPr'); + const rPr = (node.elements ?? []).find((el) => el.name === 'm:rPr'); const variant = resolveMathVariant(rPr); - const tag = classifyMathText(text); + const atoms = tokenizeFunctionNameText(text); - const el = doc.createElementNS(MATHML_NS, tag); - el.textContent = text; + const createAtom = (atom: { tag: MathAtomTag; content: string }): Element => { + const el = doc.createElementNS(MATHML_NS, atom.tag); + el.textContent = atom.content; + if (variant) el.setAttribute('mathvariant', variant); + return el; + }; - // Apply mathvariant when the spec properties resolve to one. The default - // for single-char is italic and for multi-char // is - // normal — we only set an attribute when m:rPr explicitly specifies it. - if (variant) { - el.setAttribute('mathvariant', variant); - } + if (atoms.length === 1) return createAtom(atoms[0]!); - return el; -}; + const fragment = doc.createDocumentFragment(); + for (const atom of atoms) fragment.appendChild(createAtom(atom)); + return fragment; +} diff --git a/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts b/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts index 010d6b9218..32930a01eb 100644 --- a/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts +++ b/packages/layout-engine/painters/dom/src/features/math/omml-to-mathml.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect } from 'vitest'; import { JSDOM } from 'jsdom'; import { convertOmmlToMathml, MATHML_NS } from './omml-to-mathml.js'; +import { tokenizeMathText } from './converters/math-run.js'; const dom = new JSDOM(''); const doc = dom.window.document; @@ -343,6 +344,310 @@ describe('convertOmmlToMathml', () => { expect(children.some((c) => c.localName === 'mo')).toBe(true); // + expect(children.some((c) => c.localName === 'mn')).toBe(true); // 1 }); + + // ─── tokenizeMathText direct coverage (SD-2632) ──────────────────────────── + + it('tokenizes leading-decimal content with . as an operator followed by digits', () => { + // ".5" has no leading digit, so the "." is not part of a number. + expect(tokenizeMathText('.5')).toEqual([ + { tag: 'mo', content: '.' }, + { tag: 'mn', content: '5' }, + ]); + }); + + it('tokenizes a trailing decimal point as a separate operator', () => { + // "5." — the digit run ends at "5" because a lookahead digit is required. + expect(tokenizeMathText('5.')).toEqual([ + { tag: 'mn', content: '5' }, + { tag: 'mo', content: '.' }, + ]); + }); + + it('tokenizes "1.2.3" as number, operator, number — only first dot is inline', () => { + expect(tokenizeMathText('1.2.3')).toEqual([ + { tag: 'mn', content: '1.2' }, + { tag: 'mo', content: '.' }, + { tag: 'mn', content: '3' }, + ]); + }); + + it('tokenizes "2x+1" — number-identifier-operator-number', () => { + expect(tokenizeMathText('2x+1')).toEqual([ + { tag: 'mn', content: '2' }, + { tag: 'mi', content: 'x' }, + { tag: 'mo', content: '+' }, + { tag: 'mn', content: '1' }, + ]); + }); + + it('tokenizes consecutive operator characters as separate atoms', () => { + expect(tokenizeMathText('\u2264\u2265')).toEqual([ + { tag: 'mo', content: '\u2264' }, + { tag: 'mo', content: '\u2265' }, + ]); + }); + + it('tokenizes empty text as an empty list', () => { + expect(tokenizeMathText('')).toEqual([]); + }); + + it('tokenizes standalone ∞ as identifier, not operator (SD-2632)', () => { + // U+221E was removed from OPERATOR_CHARS; Word classifies it as . + expect(tokenizeMathText('\u221E')).toEqual([{ tag: 'mi', content: '\u221E' }]); + }); + + it('keeps astral-plane characters whole (does not split surrogate pairs)', () => { + // 𝑥 (U+1D465, mathematical italic small x) is a UTF-16 surrogate pair. + // Splitting by code unit would emit two bogus half-pair s. + const text = '\u{1D465}+1'; + expect(tokenizeMathText(text)).toEqual([ + { tag: 'mi', content: '\u{1D465}' }, + { tag: 'mo', content: '+' }, + { tag: 'mn', content: '1' }, + ]); + }); + + // ─── SD-2632: per-character split of multi-char m:r text ────────────────── + + it('splits a single m:r containing operator + identifier into + (SD-2632)', () => { + // Fixture case 1 of math-limit-tests.docx has m:r "→∞" as one run inside + // m:limLow's m:lim. Word's OMML2MML.XSL splits it to . + const omml = { + name: 'm:oMath', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '\u2192\u221E' }] }] }], + }; + const result = convertOmmlToMathml(omml, doc); + const children = Array.from(result!.children); + expect(children.map((c) => `${c.localName}:${c.textContent}`)).toEqual(['mo:\u2192', 'mi:\u221E']); + }); + + it('splits "x+1=2" per character with digits grouped (SD-2632)', () => { + const omml = { + name: 'm:oMath', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x+1=2' }] }] }], + }; + const result = convertOmmlToMathml(omml, doc); + const children = Array.from(result!.children); + expect(children.map((c) => `${c.localName}:${c.textContent}`)).toEqual(['mi:x', 'mo:+', 'mn:1', 'mo:=', 'mn:2']); + }); + + it('groups consecutive digits with an interior decimal point into one (SD-2632)', () => { + const omml = { + name: 'm:oMath', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '123.45+67' }] }] }], + }; + const result = convertOmmlToMathml(omml, doc); + const children = Array.from(result!.children); + expect(children.map((c) => `${c.localName}:${c.textContent}`)).toEqual(['mn:123.45', 'mo:+', 'mn:67']); + }); + + it('splits m:r content inside m:sub of an m:sSub (SD-2632 F3)', () => { + // Word's built-up "b_(n+1)" has "n+1" as a single m:r inside m:sub. + // The subscript should contain separate n+1. + const omml = { + name: 'm:oMath', + elements: [ + { + name: 'm:sSub', + elements: [ + { + name: 'm:e', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'b' }] }] }], + }, + { + name: 'm:sub', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'n+1' }] }] }], + }, + ], + }, + ], + }; + const result = convertOmmlToMathml(omml, doc); + const subMrow = result!.querySelector('msub > mrow:nth-child(2)'); + expect(subMrow).not.toBeNull(); + const children = Array.from(subMrow!.children); + expect(children.map((c) => `${c.localName}:${c.textContent}`)).toEqual(['mi:n', 'mo:+', 'mn:1']); + }); + + it('preserves m:rPr mathvariant across every atom of a split run (SD-2632)', () => { + // When m:sty="b" (bold) applies to the whole run, every atom emitted + // from the split inherits it. + const omml = { + name: 'm:oMath', + elements: [ + { + name: 'm:r', + elements: [ + { name: 'm:rPr', elements: [{ name: 'm:sty', attributes: { 'm:val': 'b' } }] }, + { name: 'm:t', elements: [{ type: 'text', text: 'x+1' }] }, + ], + }, + ], + }; + const result = convertOmmlToMathml(omml, doc); + const variants = Array.from(result!.children).map((c) => c.getAttribute('mathvariant')); + expect(variants).toEqual(['bold', 'bold', 'bold']); + }); + + it('keeps "log" whole but splits operators and digits for m:fName with mixed content (SD-2632)', () => { + // Word's OMML2MML.XSL for log_2: letters group + // into one , operators and digits still split. + const omml = { + name: 'm:oMath', + elements: [ + { + name: 'm:func', + elements: [ + { + name: 'm:fName', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'log_2' }] }] }], + }, + { + name: 'm:e', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x' }] }] }], + }, + ], + }, + ], + }; + const result = convertOmmlToMathml(omml, doc); + const fnameRow = result!.querySelector('mrow > mrow:first-child'); + const children = Array.from(fnameRow!.children); + expect(children.map((c) => `${c.localName}:${c.textContent}`)).toEqual(['mi:log', 'mo:_', 'mn:2']); + }); + + it('collapses a multi-char base inside nested m:sSub wrapped by m:fName (SD-2632)', () => { + // Ensures the msub/msup entries of BASE_BEARING_ELEMENTS are actually pinned. + // fi should + // keep "f" as a single inside the subscript wrapper's base slot. + const omml = { + name: 'm:oMath', + elements: [ + { + name: 'm:func', + elements: [ + { + name: 'm:fName', + elements: [ + { + name: 'm:sSub', + elements: [ + { + name: 'm:e', + elements: [ + { + name: 'm:r', + elements: [ + { name: 'm:rPr', elements: [{ name: 'm:sty', attributes: { 'm:val': 'p' } }] }, + { name: 'm:t', elements: [{ type: 'text', text: 'log' }] }, + ], + }, + ], + }, + { + name: 'm:sub', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '2' }] }] }], + }, + ], + }, + ], + }, + { + name: 'm:e', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x' }] }] }], + }, + ], + }, + ], + }; + const result = convertOmmlToMathml(omml, doc); + const msub = result!.querySelector('msub'); + expect(msub).not.toBeNull(); + const baseMi = msub!.children[0]!.querySelector('mi'); + expect(baseMi!.textContent).toBe('log'); + expect(baseMi!.getAttribute('mathvariant')).toBe('normal'); + }); + + it('collapses multi-char base inside nested m:sPre (mmultiscripts) wrapped by m:fName (SD-2632)', () => { + const omml = { + name: 'm:oMath', + elements: [ + { + name: 'm:func', + elements: [ + { + name: 'm:fName', + elements: [ + { + name: 'm:sPre', + elements: [ + { + name: 'm:sub', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: '2' }] }] }], + }, + { + name: 'm:sup', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'n' }] }] }], + }, + { + name: 'm:e', + elements: [ + { + name: 'm:r', + elements: [ + { name: 'm:rPr', elements: [{ name: 'm:sty', attributes: { 'm:val': 'p' } }] }, + { name: 'm:t', elements: [{ type: 'text', text: 'log' }] }, + ], + }, + ], + }, + ], + }, + ], + }, + { + name: 'm:e', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x' }] }] }], + }, + ], + }, + ], + }; + const result = convertOmmlToMathml(omml, doc); + const mms = result!.querySelector('mmultiscripts'); + expect(mms).not.toBeNull(); + const baseMi = mms!.children[0]!.querySelector('mi'); + expect(baseMi!.textContent).toBe('log'); + expect(baseMi!.getAttribute('mathvariant')).toBe('normal'); + }); + + it('keeps multi-letter function names whole inside m:func > m:fName (SD-2632 exception)', () => { + // Word's OMML2MML.XSL keeps "sin" as one when nested in m:fName, + // even though it would otherwise per-char split a bare m:r. Exception is + // applied by convertFunction. + const omml = { + name: 'm:oMath', + elements: [ + { + name: 'm:func', + elements: [ + { + name: 'm:fName', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'sin' }] }] }], + }, + { + name: 'm:e', + elements: [{ name: 'm:r', elements: [{ name: 'm:t', elements: [{ type: 'text', text: 'x' }] }] }], + }, + ], + }, + ], + }; + const result = convertOmmlToMathml(omml, doc); + const functionName = result!.querySelector('mrow > mrow:first-child > mi'); + expect(functionName!.textContent).toBe('sin'); + expect(functionName!.getAttribute('mathvariant')).toBe('normal'); + }); }); describe('m:bar converter', () => { diff --git a/packages/layout-engine/painters/dom/src/features/math/types.ts b/packages/layout-engine/painters/dom/src/features/math/types.ts index f1e8d90b71..00c31ea60f 100644 --- a/packages/layout-engine/painters/dom/src/features/math/types.ts +++ b/packages/layout-engine/painters/dom/src/features/math/types.ts @@ -43,4 +43,4 @@ export type MathObjectConverter = ( node: OmmlJsonNode, doc: Document, convertChildren: (children: OmmlJsonNode[]) => DocumentFragment, -) => Element | null; +) => Node | null; diff --git a/tests/behavior/tests/importing/math-equations.spec.ts b/tests/behavior/tests/importing/math-equations.spec.ts index 647c9d8342..650c6385be 100644 --- a/tests/behavior/tests/importing/math-equations.spec.ts +++ b/tests/behavior/tests/importing/math-equations.spec.ts @@ -821,6 +821,24 @@ test.describe('m:limLow / m:limUpp (limit object) rendering', () => { }); expect(leaked).toEqual([]); }); + + test('splits multi-char operator runs in m:lim content (SD-2632)', async ({ superdoc }) => { + await superdoc.loadDocument(LIMIT_DOC); + await superdoc.waitForStable(); + + // Case 1: lim_(n→∞). Word emits the "→∞" as a single m:r. Previously we + // rendered it as one →∞; now per Word's OMML2MML.XSL it splits + // into separate atoms. Assert the full ordered sequence so a regression + // that drops or misclassifies any atom is caught. + const limExpressionAtoms = await superdoc.page.evaluate(() => { + const munders = Array.from(document.querySelectorAll('munder')); + const limMunder = munders.find((m) => m.children[0]?.querySelector('mi')?.textContent === 'lim'); + const limExpr = limMunder?.children[1]; + return Array.from(limExpr?.children ?? []).map((c) => `${c.localName}:${c.textContent}`); + }); + + expect(limExpressionAtoms).toEqual(['mi:n', 'mo:\u2192', 'mi:\u221E']); + }); }); test.describe('m:eqArr (equation array) rendering', () => { @@ -1341,9 +1359,12 @@ test.describe('m:groupChr (group character) rendering', () => { await superdoc.loadDocument(GROUPCHR_DOC); await superdoc.waitForStable(); + // Use `:scope > mo` to target the group character directly — the base + // expression may itself contain atoms (e.g. "a+b" splits to + // a+b per Word's OMML2MML.XSL). const firstMunder = await superdoc.page.evaluate(() => { const munder = document.querySelector('munder'); - const mo = munder?.querySelector('mo'); + const mo = munder?.querySelector(':scope > mo'); return mo ? { text: mo.textContent, stretchy: mo.getAttribute('stretchy') } : null; }); @@ -1359,7 +1380,7 @@ test.describe('m:groupChr (group character) rendering', () => { // Variant 2 — second munder in DOM order. const hiddenChar = await superdoc.page.evaluate(() => { const munders = document.querySelectorAll('munder'); - const mo = munders[1]?.querySelector('mo'); + const mo = munders[1]?.querySelector(':scope > mo'); return mo?.textContent; }); @@ -1372,7 +1393,7 @@ test.describe('m:groupChr (group character) rendering', () => { const chars = await superdoc.page.evaluate(() => { const wrappers = document.querySelectorAll('munder, mover'); - return Array.from(wrappers).map((w) => w.querySelector('mo')?.textContent ?? null); + return Array.from(wrappers).map((w) => w.querySelector(':scope > mo')?.textContent ?? null); }); // Variants 4 (U+23DE), 5 (U+2190), 6 (U+2192).