File tree Expand file tree Collapse file tree 2 files changed +18
-3
lines changed Expand file tree Collapse file tree 2 files changed +18
-3
lines changed Original file line number Diff line number Diff line change @@ -321,6 +321,8 @@ defmodule StringTest do
321321 assert String . normalize ( "거̄" , :nfc ) == "거̄"
322322 assert String . normalize ( "뢴" , :nfc ) == "뢴"
323323
324+ ## Cases from NormalizationTest.txt
325+
324326 # 05B8 05B9 05B1 0591 05C3 05B0 05AC 059F
325327 # 05B1 05B8 05B9 0591 05C3 05B0 05AC 059F
326328 # HEBREW POINT QAMATS, HEBREW POINT HOLAM, HEBREW POINT HATAF SEGOL,
@@ -343,6 +345,11 @@ defmodule StringTest do
343345 # 0308 0301
344346 # COMBINING GREEK DIALYTIKA TONOS
345347 assert String . normalize ( "\u0344 " , :nfc ) == "\u0308 \u0301 "
348+
349+ # 115B9 0334 115AF
350+ # 115B9 0334 115AF
351+ # SIDDHAM VOWEL SIGN AI, COMBINING TILDE OVERLAY, SIDDHAM VOWEL SIGN AA
352+ assert String . normalize ( "𑖹̴𑖯" , :nfc ) == "𑖹̴𑖯"
346353 end
347354
348355 test "graphemes" do
Original file line number Diff line number Diff line change @@ -451,6 +451,7 @@ defmodule String.Break do
451451 defp add_buffer_to_acc ( buffer , acc ) , do: [ buffer | acc ]
452452end
453453
454+
454455defmodule String.Normalizer do
455456 @ moduledoc false
456457
@@ -521,12 +522,19 @@ defmodule String.Normalizer do
521522 end
522523 end
523524 defp canonical_order ( << h :: utf8 , t :: binary >> , acc ) do
524- canonical_order ( t , [ { h , combining_class ( h ) } | acc ] )
525+ case combining_class ( h ) do
526+ 0 -> canonical_order ( acc ) <> canonical_order ( t , [ { h , 0 } ] )
527+ n -> canonical_order ( t , [ { h , n } | acc ] )
528+ end
529+ end
530+ defp canonical_order ( << >> , acc ) do
531+ canonical_order ( acc )
525532 end
526- defp canonical_order ( << >> , [ { x , _ } ] ) do
533+
534+ defp canonical_order ( [ { x , _ } ] ) do
527535 << x :: utf8 >>
528536 end
529- defp canonical_order ( << >> , acc ) do
537+ defp canonical_order ( acc ) do
530538 :lists . keysort ( 2 , Enum . reverse ( acc ) )
531539 |> Enum . map ( & << elem ( & 1 , 0 ) :: utf8 >> )
532540 |> IO . iodata_to_binary
You can’t perform that action at this time.
0 commit comments