diff --git a/src/layout/UnicodeLayoutEngine.js b/src/layout/UnicodeLayoutEngine.js index 68d5b9a0..27c5ffd7 100644 --- a/src/layout/UnicodeLayoutEngine.js +++ b/src/layout/UnicodeLayoutEngine.js @@ -149,7 +149,7 @@ export default class UnicodeLayoutEngine { case 0x0e37: case 0x0e47: case 0x0e4c: - case 0x0e3d: + case 0x0e4d: case 0x0e4e: return 'Above_Right'; diff --git a/src/opentype/OTLayoutEngine.js b/src/opentype/OTLayoutEngine.js index 15cf3627..ae5a1a4f 100644 --- a/src/opentype/OTLayoutEngine.js +++ b/src/opentype/OTLayoutEngine.js @@ -38,9 +38,13 @@ export default class OTLayoutEngine { } // Choose a shaper based on the script, and setup a shaping plan. - // This determines which features to apply to which glyphs. - this.shaper = Shapers.choose(script); - this.plan = new ShapingPlan(this.font, script, glyphRun.direction); + // This determines which features to apply to which glyphs. Fall back + // to the buffer's Unicode script when neither GSUB nor GPOS picked an + // OT script — script-specific shaping (e.g. Thai SARA AM decomp, the + // PUA fallback for fonts without Thai GSUB) still applies. + let shaperScript = script || glyphRun.script; + this.shaper = Shapers.choose(shaperScript); + this.plan = new ShapingPlan(this.font, shaperScript, glyphRun.direction); this.shaper.plan(this.plan, this.glyphInfos, glyphRun.features); // Assign chosen features to output glyph run diff --git a/src/opentype/shapers/ThaiShaper.js b/src/opentype/shapers/ThaiShaper.js new file mode 100644 index 00000000..46f450a3 --- /dev/null +++ b/src/opentype/shapers/ThaiShaper.js @@ -0,0 +1,286 @@ +import DefaultShaper from './DefaultShaper'; +import GlyphInfo from '../GlyphInfo'; + +/** + * Thai / Lao shaper, ported from HarfBuzz's hb-ot-shaper-thai.cc. + * + * 1. SARA AM decomposition + NIKHAHIT reorder (always-on) + * 2. PUA fallback shaping for legacy fonts without Thai GSUB + * + * Step 1 is needed by every modern Thai font — without it the GSUB chain + * rules for tone-mark shifting (e.g. `uni0E49.small`) never fire because + * the buffer ends up with `[base, tone, NIKHAHIT, SARA AA]` instead of + * `[base, NIKHAHIT, tone, SARA AA]`. + * + * SARA AM (U+0E33) -> NIKHAHIT (U+0E4D) + SARA AA (U+0E32) + * Lao SARA AM (U+0EB3) -> NIKHAHIT (U+0ECD) + SARA AA (U+0EB2) + * + * The NIKHAHIT then walks backward over any above-base marks so it sits + * between the base and the existing tone-mark stack. + * + * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32> + * + * Step 2 walks an above/below state machine and remaps tone marks to PUA + * codepoints when the font ships those (older Microsoft / Apple Thai + * fonts). Modern fonts with GSUB don't need this; HarfBuzz gates it on + * the absence of Thai GSUB and so do we. + */ +export default class ThaiShaper extends DefaultShaper { + static assignFeatures(plan, glyphs) { + super.assignFeatures(plan, glyphs); + preprocessThai(glyphs, plan.font); + if (plan.script === 'thai' && !hasThaiGsub(plan.font)) { + applyThaiPuaShaping(glyphs, plan.font); + } + } +} + +// Thai SARA AM is U+0E33; Lao SARA AM is U+0EB3 — they only differ in the +// 0x80 bit so HarfBuzz uses a script-agnostic mask. We do the same. +function isSaraAm(u) { + return (u & ~0x0080) === 0x0E33; +} + +function nikhahitFromSaraAm(u) { + return u - 0x0E33 + 0x0E4D; +} + +function saraAaFromSaraAm(u) { + return u - 1; +} + +// Marks that sit above the base. The script-agnostic mask applies the +// same set for both Thai and Lao (Lao codepoints are offset by 0x80). +// Thai: U+0E31, U+0E34..U+0E37, U+0E47..U+0E4E, U+0E3B +// Lao: U+0EB1, U+0EB4..U+0EB7, U+0EC8..U+0ECE, U+0EBB +function isAboveBaseMark(u) { + const c = u & ~0x0080; + return c === 0x0E31 + || (c >= 0x0E34 && c <= 0x0E37) + || (c >= 0x0E47 && c <= 0x0E4E) + || c === 0x0E3B; +} + +function preprocessThai(glyphs, font) { + let i = 0; + while (i < glyphs.length) { + const u = glyphs[i].codePoints[0]; + if (!isSaraAm(u)) { + i++; + continue; + } + + // Decompose SARA AM in place into NIKHAHIT + SARA AA. Both new + // glyphs inherit the original GlyphInfo's feature flags. + const features = glyphs[i].features; + const nikhahit = makeGlyph(font, nikhahitFromSaraAm(u), features); + const saraAa = makeGlyph(font, saraAaFromSaraAm(u), features); + glyphs.splice(i, 1, nikhahit, saraAa); + + // Walk the NIKHAHIT backward over any above-base marks belonging to + // the same base. + let nikhahitIndex = i; + let target = nikhahitIndex; + while (target > 0 && isAboveBaseMark(glyphs[target - 1].codePoints[0])) { + target--; + } + if (target !== nikhahitIndex) { + const moved = glyphs.splice(nikhahitIndex, 1)[0]; + glyphs.splice(target, 0, moved); + } + + // Advance past NIKHAHIT + SARA AA. + i += 2; + } +} + +function makeGlyph(font, codePoint, features) { + const id = font.glyphForCodePoint(codePoint).id; + return new GlyphInfo(font, id, [codePoint], features); +} + +// ── PUA fallback shaping ──────────────────────────────────────────────── +// +// Walks an above-base state machine and a below-base state machine in +// parallel. Each tone/vowel mark may trigger one of the following +// actions: +// +// NOP — leave the glyph alone +// SD — shift the mark DOWN to clear a descender +// SL — shift the mark LEFT to clear another above-base mark +// SDL — shift the mark DOWN-LEFT (both) +// RD — remove the descender from the BASE consonant +// +// Each action is realised by replacing the mark (or base) codepoint with +// a private-use mapping, when the font ships that PUA glyph. + +const NOP = 0; +const SD = 1; +const SL = 2; +const SDL = 3; +const RD = 4; + +// Consonant types +const NC = 0; // normal consonant +const AC = 1; // consonant with ascender (1B/1D/1F) +const RC = 2; // consonant with removable descender (0D/10) +const DC = 3; // consonant with strict descender (0E/0F) +const NOT_CONSONANT = 4; + +// Mark types +const AV = 0; // above-base vowel/mark +const BV = 1; // below-base vowel/mark +const T = 2; // tone mark +const NOT_MARK = 3; + +function getConsonantType(u) { + if (u === 0x0E1B || u === 0x0E1D || u === 0x0E1F) return AC; + if (u === 0x0E0D || u === 0x0E10) return RC; + if (u === 0x0E0E || u === 0x0E0F) return DC; + if (u >= 0x0E01 && u <= 0x0E2E) return NC; + return NOT_CONSONANT; +} + +function getMarkType(u) { + if ( + u === 0x0E31 || + (u >= 0x0E34 && u <= 0x0E37) || + u === 0x0E47 || + (u >= 0x0E4D && u <= 0x0E4E) + ) { + return AV; + } + if (u >= 0x0E38 && u <= 0x0E3A) return BV; + if (u >= 0x0E48 && u <= 0x0E4C) return T; + return NOT_MARK; +} + +// Above-base cluster state (T0..T3 = increasing stack height). +const T0 = 0, T1 = 1, T2 = 2, T3 = 3; +const ABOVE_START_STATE = [T0, T1, T0, T0, T3]; +// NC AC RC DC NOT_CONSONANT +const ABOVE_STATE_MACHINE = [ + // AV BV T + [[NOP, T3], [NOP, T0], [SD, T3]], // T0 + [[SL, T2], [NOP, T1], [SDL, T2]], // T1 + [[NOP, T3], [NOP, T2], [SL, T3]], // T2 + [[NOP, T3], [NOP, T3], [NOP, T3]] // T3 +]; + +// Below-base state (B0=none, B1=removable, B2=strict). +const B0 = 0, B1 = 1, B2 = 2; +const BELOW_START_STATE = [B0, B0, B1, B2, B2]; +// NC AC RC DC NOT_CONSONANT +const BELOW_STATE_MACHINE = [ + // AV BV T + [[NOP, B0], [NOP, B2], [NOP, B0]], // B0 + [[NOP, B1], [RD, B2], [NOP, B1]], // B1 + [[NOP, B2], [SD, B2], [NOP, B2]] // B2 +]; + +// PUA mappings (Windows and Mac private-use codepoints for shifted marks +// and descender-less base consonants). For each action we try the +// Windows PUA first, then the Mac PUA, then leave the codepoint alone. +const PUA_MAPPINGS = { + [SD]: [ + [0x0E48, 0xF70A, 0xF88B], // MAI EK + [0x0E49, 0xF70B, 0xF88E], // MAI THO + [0x0E4A, 0xF70C, 0xF891], // MAI TRI + [0x0E4B, 0xF70D, 0xF894], // MAI CHATTAWA + [0x0E4C, 0xF70E, 0xF897], // THANTHAKHAT + [0x0E38, 0xF718, 0xF89B], // SARA U + [0x0E39, 0xF719, 0xF89C], // SARA UU + [0x0E3A, 0xF71A, 0xF89D] // PHINTHU + ], + [SDL]: [ + [0x0E48, 0xF705, 0xF88C], // MAI EK + [0x0E49, 0xF706, 0xF88F], // MAI THO + [0x0E4A, 0xF707, 0xF892], // MAI TRI + [0x0E4B, 0xF708, 0xF895], // MAI CHATTAWA + [0x0E4C, 0xF709, 0xF898] // THANTHAKHAT + ], + [SL]: [ + [0x0E48, 0xF713, 0xF88A], // MAI EK + [0x0E49, 0xF714, 0xF88D], // MAI THO + [0x0E4A, 0xF715, 0xF890], // MAI TRI + [0x0E4B, 0xF716, 0xF893], // MAI CHATTAWA + [0x0E4C, 0xF717, 0xF896], // THANTHAKHAT + [0x0E31, 0xF710, 0xF884], // MAI HAN-AKAT + [0x0E34, 0xF701, 0xF885], // SARA I + [0x0E35, 0xF702, 0xF886], // SARA II + [0x0E36, 0xF703, 0xF887], // SARA UE + [0x0E37, 0xF704, 0xF888], // SARA UEE + [0x0E47, 0xF712, 0xF889], // MAITAIKHU + [0x0E4D, 0xF711, 0xF899] // NIKHAHIT + ], + [RD]: [ + [0x0E0D, 0xF70F, 0xF89A], // YO YING + [0x0E10, 0xF700, 0xF89E] // THO THAN + ] +}; + +function thaiPuaShape(u, action, font) { + if (action === NOP) return u; + const mappings = PUA_MAPPINGS[action]; + if (!mappings) return u; + for (const [orig, winPua, macPua] of mappings) { + if (orig !== u) continue; + if (font.hasGlyphForCodePoint(winPua)) return winPua; + if (font.hasGlyphForCodePoint(macPua)) return macPua; + break; + } + return u; +} + +function applyThaiPuaShaping(glyphs, font) { + let aboveState = ABOVE_START_STATE[NOT_CONSONANT]; + let belowState = BELOW_START_STATE[NOT_CONSONANT]; + let baseIndex = 0; + + for (let i = 0; i < glyphs.length; i++) { + const u = glyphs[i].codePoints[0]; + const mt = getMarkType(u); + + if (mt === NOT_MARK) { + const ct = getConsonantType(u); + aboveState = ABOVE_START_STATE[ct]; + belowState = BELOW_START_STATE[ct]; + baseIndex = i; + continue; + } + + const [aboveAction, aboveNext] = ABOVE_STATE_MACHINE[aboveState][mt]; + const [belowAction, belowNext] = BELOW_STATE_MACHINE[belowState][mt]; + aboveState = aboveNext; + belowState = belowNext; + + // At least one of the two actions is NOP; the other wins. + const action = aboveAction !== NOP ? aboveAction : belowAction; + if (action === NOP) continue; + + if (action === RD) { + const target = glyphs[baseIndex]; + const newCp = thaiPuaShape(target.codePoints[0], action, font); + if (newCp !== target.codePoints[0]) { + target.id = font.glyphForCodePoint(newCp).id; + target.codePoints = [newCp]; + } + } else { + const target = glyphs[i]; + const newCp = thaiPuaShape(u, action, font); + if (newCp !== u) { + target.id = font.glyphForCodePoint(newCp).id; + target.codePoints = [newCp]; + } + } + } +} + +// HarfBuzz gates PUA shaping on the font lacking a Thai GSUB script +// (`plan->map.found_script[0]` is false). For fontkit we check whether +// the GSUB script list contains `thai` or `thai2`. +function hasThaiGsub(font) { + const gsub = font.GSUB; + if (!gsub || !gsub.scriptList) return false; + return gsub.scriptList.some(entry => entry.tag === 'thai' || entry.tag === 'tha2'); +} diff --git a/src/opentype/shapers/index.js b/src/opentype/shapers/index.js index b22ebdd3..6f01c758 100644 --- a/src/opentype/shapers/index.js +++ b/src/opentype/shapers/index.js @@ -2,6 +2,7 @@ import DefaultShaper from './DefaultShaper'; import ArabicShaper from './ArabicShaper'; import HangulShaper from './HangulShaper'; import IndicShaper from './IndicShaper'; +import ThaiShaper from './ThaiShaper'; import UniversalShaper from './UniversalShaper'; const SHAPERS = { @@ -36,6 +37,9 @@ const SHAPERS = { tel2: IndicShaper, // Telugu khmr: IndicShaper, // Khmer + thai: ThaiShaper, // Thai + 'lao ': ThaiShaper, // Lao (4-char OT tag with trailing space) + bali: UniversalShaper, // Balinese batk: UniversalShaper, // Batak brah: UniversalShaper, // Brahmi diff --git a/test/data/NotoSans/NotoSansLao-Regular.LICENSE.txt b/test/data/NotoSans/NotoSansLao-Regular.LICENSE.txt new file mode 100644 index 00000000..c82d72e4 --- /dev/null +++ b/test/data/NotoSans/NotoSansLao-Regular.LICENSE.txt @@ -0,0 +1,94 @@ +Copyright 2018 The Noto Project Authors (github.com/googlei18n/noto-fonts) + +This Font Software is licensed under the SIL Open Font License, +Version 1.1. + +This license is copied below, and is also available with a FAQ at: +http://scripts.sil.org/OFL + +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font +creation efforts of academic and linguistic communities, and to +provide a free and open framework in which fonts may be shared and +improved in partnership with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply to +any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software +components as distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, +deleting, or substituting -- in part or in whole -- any of the +components of the Original Version, by changing formats or by porting +the Font Software to a new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, +modify, redistribute, and sell modified and unmodified copies of the +Font Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, in +Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the +corresponding Copyright Holder. This restriction only applies to the +primary font name as presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created using +the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. diff --git a/test/data/NotoSans/NotoSansLao-Regular.ttf b/test/data/NotoSans/NotoSansLao-Regular.ttf new file mode 100644 index 00000000..02148887 Binary files /dev/null and b/test/data/NotoSans/NotoSansLao-Regular.ttf differ diff --git a/test/data/NotoSans/NotoSansThai-Regular.LICENSE.txt b/test/data/NotoSans/NotoSansThai-Regular.LICENSE.txt new file mode 100644 index 00000000..c82d72e4 --- /dev/null +++ b/test/data/NotoSans/NotoSansThai-Regular.LICENSE.txt @@ -0,0 +1,94 @@ +Copyright 2018 The Noto Project Authors (github.com/googlei18n/noto-fonts) + +This Font Software is licensed under the SIL Open Font License, +Version 1.1. + +This license is copied below, and is also available with a FAQ at: +http://scripts.sil.org/OFL + +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font +creation efforts of academic and linguistic communities, and to +provide a free and open framework in which fonts may be shared and +improved in partnership with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply to +any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software +components as distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, +deleting, or substituting -- in part or in whole -- any of the +components of the Original Version, by changing formats or by porting +the Font Software to a new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, +modify, redistribute, and sell modified and unmodified copies of the +Font Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, in +Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the +corresponding Copyright Holder. This restriction only applies to the +primary font name as presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created using +the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. diff --git a/test/data/NotoSans/NotoSansThai-Regular.ttf b/test/data/NotoSans/NotoSansThai-Regular.ttf new file mode 100644 index 00000000..47c3fa8d Binary files /dev/null and b/test/data/NotoSans/NotoSansThai-Regular.ttf differ diff --git a/test/shaping.js b/test/shaping.js index dc005ea0..85bd1ac8 100644 --- a/test/shaping.js +++ b/test/shaping.js @@ -69,6 +69,46 @@ describe('shaping', function () { test('should adjust attached marks if base is adjusted', 'amiri/amiri-regular.ttf', 'لَكنت', '2054+1810|2133+500|2300+1206|427@-96,0+0|5988+380|2322+360'); }); + describe('thai shaper', function () { + // SARA AM (U+0E33) decomposition + NIKHAHIT reorder. Without the shaper, fontkit leaves U+0E33 unsplit and the GSUB chain rule for tone-mark shifting (uni0E49 → uni0E49.small) cannot fire. + test('should decompose SARA AM and shift tone mark', + 'NotoSans/NotoSansThai-Regular.ttf', 'น้ำ', + '71+613|59+0|49@-29,0+0|86+406'); + + test('should decompose SARA AM with mai-ek tone', + 'NotoSans/NotoSansThai-Regular.ttf', 'ก่ำ', + '29+600|59@-2,0+0|44@-31,0+0|86+406'); + + test('should decompose SARA AM after a consonant with an ascender', + 'NotoSans/NotoSansThai-Regular.ttf', 'ข่ำ', + '23+547|59@7,0+0|44@-22,0+0|86+406'); + + // Common Thai phrase exercising ccmp + mark positioning across syllables. + test('should shape สวัสดี', + 'NotoSans/NotoSansThai-Regular.ttf', 'สวัสดี', + '110+572|134+492|45@10,0+0|110+572|12+616|94+0'); + + // Bare SARA AM at the start of a run — decomposes into NIKHAHIT + SARA AA with no preceding base to walk back over. Exercises the `target > 0` short-circuit. + test('should decompose a bare SARA AM', + 'NotoSans/NotoSansThai-Regular.ttf', 'ำ', + '59+0|86+406'); + + // SARA AM after only an above-base mark (no consonant base). NIKHAHIT walks back over the above mark to index 0. + test('should reorder NIKHAHIT past a leading above-mark with no base', + 'NotoSans/NotoSansThai-Regular.ttf', 'ัำ', + '59+0|45@-29,236+0|86+406'); + + // Two SARA AMs in one run — exercise the loop entering decomposition twice and re-positioning the cursor correctly between them. + test('should decompose multiple SARA AMs in one word', + 'NotoSans/NotoSansThai-Regular.ttf', 'น้ำน้ำ', + '71+613|59+0|49@-29,0+0|86+406|71+613|59+0|49@-29,0+0|86+406'); + + // Lao SARA AM (U+0EB3) — the script-agnostic mask handles both Thai and Lao. This requires the 'lao ' OT tag (with trailing space) in the shaper registry. + test('should decompose Lao SARA AM and reorder NIKHAHIT past an above-mark', + 'NotoSans/NotoSansLao-Regular.ttf', 'ຫັຳ', + '28+726|72@-28,0+0|58@-152,278+0|34+324'); + }); + describe('hangul shaper', function () { let font = fontkit.openSync(new URL('data/NotoSansCJK/NotoSansCJKkr-Regular.otf', import.meta.url));