@@ -593,8 +593,8 @@ avl_result avl_insert( dir_node_avl **in_root, dir_node_avl *in_node );
593593int avl_traverse_depth_first ( dir_node_avl * in_root , traversal_callback in_callback , void * in_context , avl_traversal_method in_method , int in_depth );
594594
595595void boyer_moore_done ();
596- char * boyer_moore_search ( char * in_text , long in_text_len );
597- int boyer_moore_init ( const char * in_pattern , long in_pat_len , long in_alphabet_size );
596+ char * boyer_moore_search ( char * in_text , size_t in_text_len );
597+ int boyer_moore_init ( const char * in_pattern , size_t in_pat_len , size_t in_alphabet_size );
598598
599599int free_dir_node_avl (dir_node_avl * in_dir_node_avl , void * in_context , int in_depth );
600600int extract_file ( int in_xiso , dir_node * in_file , modes in_mode , const char * path );
@@ -622,11 +622,11 @@ void write_sector( int in_xiso, xoff_t in_start, const char *in_name, const char
622622#endif
623623
624624
625- static long s_pat_len ;
625+ static size_t s_pat_len ;
626626static bool s_quiet = false;
627627static const char * s_pattern = NULL ;
628- static long * s_gs_table = NULL ;
629- static long * s_bc_table = NULL ;
628+ static size_t * s_gs_table = NULL ;
629+ static size_t * s_bc_table = NULL ;
630630static long long s_total_bytes = 0 ;
631631static int s_total_files = 0 ;
632632static char * s_copy_buffer = NULL ;
@@ -1622,41 +1622,58 @@ int avl_traverse_depth_first( dir_node_avl *in_root, traversal_callback in_callb
16221622#endif
16231623
16241624
1625- int boyer_moore_init ( const char * in_pattern , long in_pat_len , long in_alphabet_size ) {
1626- long i , j , k , * backup , err = 0 ;
1625+ int boyer_moore_init ( const char * in_pattern , size_t in_pat_len , size_t in_alphabet_size ) {
1626+ size_t j , k , t , t1 , q , q1 , * aux = NULL ;
1627+ int err = 0 ;
16271628
16281629 s_pattern = in_pattern ;
16291630 s_pat_len = in_pat_len ;
1630-
1631- if ( ( s_bc_table = (long * ) malloc ( in_alphabet_size * sizeof (long ) ) ) == NULL ) mem_err ();
1632-
1633- if ( ! err ) {
1634- for ( i = 0 ; i < in_alphabet_size ; ++ i ) s_bc_table [ i ] = in_pat_len ;
1635- for ( i = 0 ; i < in_pat_len - 1 ; ++ i ) s_bc_table [ (uint8_t ) in_pattern [ i ] ] = in_pat_len - i - 1 ;
1636-
1637- if ( ( s_gs_table = (long * ) malloc ( 2 * ( in_pat_len + 1 ) * sizeof (long ) ) ) == NULL ) mem_err ();
1638- }
16391631
1640- if ( ! err ) {
1641- backup = s_gs_table + in_pat_len + 1 ;
1642-
1643- for ( i = 1 ; i <= in_pat_len ; ++ i ) s_gs_table [ i ] = 2 * in_pat_len - i ;
1644- for ( i = in_pat_len , j = in_pat_len + 1 ; i ; -- i , -- j ) {
1645- backup [ i ] = j ;
1632+ boyer_moore_done (); // Prepare for a new init
1633+
1634+ if (in_pat_len == 0 ) return 0 ;
16461635
1647- while ( j <= in_pat_len && in_pattern [ i - 1 ] != in_pattern [ j - 1 ] ) {
1648- if ( s_gs_table [ j ] > in_pat_len - i ) s_gs_table [ j ] = in_pat_len - i ;
1649- j = backup [ j ];
1636+ // Delta1 table
1637+ if ((s_bc_table = (size_t * )malloc (in_alphabet_size * sizeof (size_t ))) == NULL ) mem_err ();
1638+ if (!err ) {
1639+ for (k = 0 ; k < in_alphabet_size ; k ++ ) s_bc_table [k ] = in_pat_len ;
1640+ for (k = 0 ; k < in_pat_len ; k ++ ) s_bc_table [(unsigned char )in_pattern [k ]] = in_pat_len - 1 - k ;
1641+ }
1642+
1643+ // Delta2 table (dd' algorithm with Rytter correction)
1644+ if (!err && (s_gs_table = (size_t * )malloc (in_pat_len * sizeof (size_t ))) == NULL ) mem_err ();
1645+ if (!err && (aux = (size_t * )malloc (in_pat_len * sizeof (size_t ))) == NULL ) mem_err ();
1646+ if (!err ) {
1647+ // Step A1
1648+ for (k = 1 ; k <= in_pat_len ; k ++ ) s_gs_table [k - 1 ] = 2 * in_pat_len - k ;
1649+
1650+ // Step A2
1651+ for (j = in_pat_len , t = in_pat_len + 1 ; j > 0 ; j -- , t -- ) {
1652+ aux [j - 1 ] = t ;
1653+ while (t <= in_pat_len && in_pattern [j - 1 ] != in_pattern [t - 1 ]) {
1654+ s_gs_table [t - 1 ] = min (s_gs_table [t - 1 ], in_pat_len - j );
1655+ t = aux [t - 1 ];
16501656 }
16511657 }
1652- for ( i = 1 ; i <= j ; ++ i ) if ( s_gs_table [ i ] > in_pat_len + j - i ) s_gs_table [ i ] = in_pat_len + j - i ;
1653-
1654- k = backup [ j ];
1655-
1656- for ( ; j <= in_pat_len ; k = backup [ k ] ) {
1657- for ( ; j <= k ; ++ j ) if ( s_gs_table [ j ] >= k - j + in_pat_len ) s_gs_table [ j ] = k - j + in_pat_len ;
1658+
1659+ // Step B1
1660+ q = t ; t = in_pat_len + 1 - q ;
1661+ for (j = 1 , t1 = 0 ; j <= t ; t1 ++ , j ++ ) {
1662+ aux [j - 1 ] = t1 ;
1663+ while (t1 >= 1 && in_pattern [j - 1 ] != in_pattern [t1 - 1 ]) t1 = aux [t1 - 1 ];
1664+ }
1665+
1666+ // Step B2
1667+ q1 = 1 ;
1668+ while (q < in_pat_len ) {
1669+ for (k = q1 ; k <= q ; k ++ ) {
1670+ s_gs_table [k - 1 ] = min (s_gs_table [k - 1 ], in_pat_len + q - k );
1671+ }
1672+ q1 = q + 1 ; q = q + t - aux [t - 1 ]; t = aux [t - 1 ];
16581673 }
16591674 }
1675+
1676+ if (aux ) free (aux );
16601677
16611678 return err ;
16621679}
@@ -1668,22 +1685,20 @@ void boyer_moore_done() {
16681685}
16691686
16701687
1671- char * boyer_moore_search ( char * in_text , long in_text_len ) {
1672- long i , j , k , l ;
1688+ char * boyer_moore_search (char * in_text , size_t in_text_len ) {
1689+ size_t i , j ;
16731690
1674- for ( i = j = s_pat_len - 1 ; j < in_text_len && i >= 0 ; ) {
1675- if ( in_text [ j ] == s_pattern [ i ] ) { -- i ; -- j ; }
1676- else {
1677- k = s_gs_table [ i + 1 ];
1678- l = s_bc_table [ (uint8_t ) in_text [ j ] ];
1691+ if (s_pat_len == 0 ) return in_text ;
16791692
1680- j += max ( k , l );
1681-
1682- i = s_pat_len - 1 ;
1693+ i = s_pat_len - 1 ;
1694+ while (i < in_text_len ) {
1695+ for (j = s_pat_len - 1 ; in_text [i ] == s_pattern [j ]; -- i , -- j ) {
1696+ if (j == 0 ) return in_text + i ;
16831697 }
1698+
1699+ i += max (s_bc_table [(unsigned char )in_text [i ]], s_gs_table [j ]);
16841700 }
1685-
1686- return i < 0 ? in_text + j + 1 : NULL ;
1701+ return NULL ;
16871702}
16881703
16891704
0 commit comments