@@ -490,19 +490,80 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
490490#define NCount (VCount*TCount)
491491#define SCount (LCount*NCount)
492492
493+ /* Small combining runs are usually cheaper with insertion sort. */
494+ #define CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD 20
495+
496+ static void
497+ canonical_ordering_sort_insertion (int kind , void * data ,
498+ Py_ssize_t start , Py_ssize_t end )
499+ {
500+ for (Py_ssize_t i = start + 1 ; i < end ; i ++ ) {
501+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
502+ unsigned char combining = _getrecord_ex (code )-> combining ;
503+ Py_ssize_t j = i ;
504+
505+ while (j > start ) {
506+ Py_UCS4 previous = PyUnicode_READ (kind , data , j - 1 );
507+ if (_getrecord_ex (previous )-> combining <= combining ) {
508+ break ;
509+ }
510+ PyUnicode_WRITE (kind , data , j , previous );
511+ j -- ;
512+ }
513+ if (j != i ) {
514+ PyUnicode_WRITE (kind , data , j , code );
515+ }
516+ }
517+ }
518+
519+ static void
520+ canonical_ordering_sort_counting (int kind , void * data ,
521+ Py_ssize_t start , Py_ssize_t end ,
522+ Py_UCS4 * sortbuf )
523+ {
524+ Py_ssize_t counts [256 ] = {0 };
525+ Py_ssize_t run_length = end - start ;
526+ Py_ssize_t total = 0 ;
527+
528+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
529+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
530+ unsigned char combining = _getrecord_ex (code )-> combining ;
531+ counts [combining ]++ ;
532+ }
533+
534+ for (size_t i = 0 ; i < Py_ARRAY_LENGTH (counts ); i ++ ) {
535+ Py_ssize_t count = counts [i ];
536+ counts [i ] = total ;
537+ total += count ;
538+ }
539+
540+ /* Reuse counts[] as the next output slot for each CCC. */
541+ for (Py_ssize_t i = start ; i < end ; i ++ ) {
542+ Py_UCS4 code = PyUnicode_READ (kind , data , i );
543+ unsigned char combining = _getrecord_ex (code )-> combining ;
544+ sortbuf [counts [combining ]++ ] = code ;
545+ }
546+ for (Py_ssize_t i = 0 ; i < run_length ; i ++ ) {
547+ PyUnicode_WRITE (kind , data , start + i , sortbuf [i ]);
548+ }
549+ }
550+
493551static PyObject *
494552nfd_nfkd (PyObject * self , PyObject * input , int k )
495553{
496554 PyObject * result ;
497555 Py_UCS4 * output ;
498556 Py_ssize_t i , o , osize ;
499- int kind ;
500- const void * data ;
557+ int input_kind , result_kind ;
558+ const void * input_data ;
559+ void * result_data ;
501560 /* Longest decomposition in Unicode 3.2: U+FDFA */
502561 Py_UCS4 stack [20 ];
503562 Py_ssize_t space , isize ;
504563 int index , prefix , count , stackptr ;
505564 unsigned char prev , cur ;
565+ Py_UCS4 * sortbuf = NULL ;
566+ Py_ssize_t sortbuflen = 0 ;
506567
507568 stackptr = 0 ;
508569 isize = PyUnicode_GET_LENGTH (input );
@@ -522,11 +583,11 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
522583 return NULL ;
523584 }
524585 i = o = 0 ;
525- kind = PyUnicode_KIND (input );
526- data = PyUnicode_DATA (input );
586+ input_kind = PyUnicode_KIND (input );
587+ input_data = PyUnicode_DATA (input );
527588
528589 while (i < isize ) {
529- stack [stackptr ++ ] = PyUnicode_READ (kind , data , i ++ );
590+ stack [stackptr ++ ] = PyUnicode_READ (input_kind , input_data , i ++ );
530591 while (stackptr ) {
531592 Py_UCS4 code = stack [-- stackptr ];
532593 /* Hangul Decomposition adds three characters in
@@ -591,35 +652,66 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
591652 PyMem_Free (output );
592653 if (!result )
593654 return NULL ;
655+
594656 /* result is guaranteed to be ready, as it is compact. */
595- kind = PyUnicode_KIND (result );
596- data = PyUnicode_DATA (result );
657+ result_kind = PyUnicode_KIND (result );
658+ result_data = PyUnicode_DATA (result );
597659
598- /* Sort canonically. */
660+ /* Sort each consecutive combining-character run canonically. */
599661 i = 0 ;
600- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
601- for (i ++ ; i < PyUnicode_GET_LENGTH (result ); i ++ ) {
602- cur = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
603- if (prev == 0 || cur == 0 || prev <= cur ) {
604- prev = cur ;
662+ while (i < o ) {
663+ Py_ssize_t run_length , run_start ;
664+ int needs_sort = 0 ;
665+
666+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
667+ prev = _getrecord_ex (ch )-> combining ;
668+ if (prev == 0 ) {
669+ i ++ ;
605670 continue ;
606671 }
607- /* Non-canonical order. Need to switch *i with previous. */
608- o = i - 1 ;
609- while (1 ) {
610- Py_UCS4 tmp = PyUnicode_READ (kind , data , o + 1 );
611- PyUnicode_WRITE (kind , data , o + 1 ,
612- PyUnicode_READ (kind , data , o ));
613- PyUnicode_WRITE (kind , data , o , tmp );
614- o -- ;
615- if (o < 0 )
616- break ;
617- prev = _getrecord_ex (PyUnicode_READ (kind , data , o ))-> combining ;
618- if (prev == 0 || prev <= cur )
672+
673+ run_start = i ++ ;
674+ while (i < o ) {
675+ Py_UCS4 ch = PyUnicode_READ (result_kind , result_data , i );
676+ cur = _getrecord_ex (ch )-> combining ;
677+ if (cur == 0 ) {
619678 break ;
679+ }
680+ if (prev > cur ) {
681+ needs_sort = 1 ;
682+ }
683+ prev = cur ;
684+ i ++ ;
685+ }
686+ if (!needs_sort ) {
687+ continue ;
688+ }
689+
690+ run_length = i - run_start ;
691+ if (run_length < CANONICAL_ORDERING_COUNTING_SORT_THRESHOLD ) {
692+ canonical_ordering_sort_insertion (result_kind , result_data ,
693+ run_start , i );
694+ continue ;
620695 }
621- prev = _getrecord_ex (PyUnicode_READ (kind , data , i ))-> combining ;
696+
697+ if (run_length > sortbuflen ) {
698+ Py_UCS4 * new_sortbuf = PyMem_Resize (sortbuf ,
699+ Py_UCS4 ,
700+ run_length );
701+ if (new_sortbuf == NULL ) {
702+ PyErr_NoMemory ();
703+ PyMem_Free (sortbuf );
704+ Py_DECREF (result );
705+ return NULL ;
706+ }
707+ sortbuf = new_sortbuf ;
708+ sortbuflen = run_length ;
709+ }
710+
711+ canonical_ordering_sort_counting (result_kind , result_data ,
712+ run_start , i , sortbuf );
622713 }
714+ PyMem_Free (sortbuf );
623715 return result ;
624716}
625717
0 commit comments