diff --git a/embed.fnc b/embed.fnc index 8eb86869bbee..7e6ea1afe56d 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1404,7 +1404,7 @@ AMdp |UV |grok_bin |NN const char *start \ |NN I32 *flags \ |NULLOK NV *result Cp |UV |grok_bin_oct_hex \ - |NN const char *start \ + |NN const char * const start \ |NN STRLEN *len_p \ |NN I32 *flags \ |NULLOK NV *result \ diff --git a/numeric.c b/numeric.c index 351435cbc627..4ed6a1409bda 100644 --- a/numeric.c +++ b/numeric.c @@ -365,7 +365,7 @@ S_output_non_portable(pTHX_ const U8 base) } UV -Perl_grok_bin_oct_hex(pTHX_ const char *start, +Perl_grok_bin_oct_hex(pTHX_ const char * const start, STRLEN *len_p, I32 *flags, NV *result, @@ -400,7 +400,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, *flags = 0; const bool allow_underscores = - cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); + cBOOL(input_flags & ( PERL_SCAN_ALLOW_UNDERSCORES + |PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES_ONLY)); const char * s = start; const char * e = start + *len_p; @@ -427,6 +428,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, /* Unroll the loop so that the first 8 digits are branchless except for the * switch. A ninth hex one overflows a 32 bit word. */ + redo_switch: switch (e - s) { default: if (UNLIKELY(! generic_isCC_(*s, class_bit))) break; @@ -473,6 +475,15 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, return value; } + /* If we get here, and the accumulated value is still 0, it is + * because there are more leading zeros than the cases of this + * switch(), These are common enough with these kinds of + * binary-style numbers that it is worth this extra conditional to + * continue absorbing them via the switch. */ + if (value == 0) { + goto redo_switch; + } + break; } @@ -486,8 +497,6 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, bool overflowed = FALSE; NV value_nv = 0; const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */ - const UV max_div= UV_MAX / base; /* Value above which, the next digit - processed would overflow */ for (; s < e; s++) { if (generic_isCC_(*s, class_bit)) { @@ -496,13 +505,20 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, With gcc seems to be much straighter code than old scan_hex. (khw suspects that adding a LIKELY() just above would do the same thing) */ - redo: - if (LIKELY(value <= max_div)) { - value = (value << shift) | XDIGIT_VALUE(*s); + redo: ; + + /* Make room for the next digit */ + UV tentative_value = value << shift; + + /* If shiftng back doesn't yield the previous value, it was + * because a bit got shifted off the left end, so overflowed. + * But if it worked, add the new digit. */ + if (LIKELY((tentative_value >> shift) == value)) { + value = tentative_value | XDIGIT_VALUE(*s); /* Note XDIGIT_VALUE() is branchless, works on binary * and octal as well, so can be used here, without * slowing those down */ - factor *= 1 << shift; + factor *= base; continue; } @@ -519,13 +535,14 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, * 'value_nv' eventually, either when all digits are gone, or we * have overflowed this fresh start. */ value = XDIGIT_VALUE(*s); - factor = 1 << shift; + factor = base; if (! overflowed) { overflowed = TRUE; - if ( ! (input_flags & PERL_SCAN_SILENT_OVERFLOW) - && ckWARN_d(WARN_OVERFLOW)) - { + if (input_flags & PERL_SCAN_SILENT_OVERFLOW) { + *flags |= PERL_SCAN_SILENT_OVERFLOW; + } + else if (ckWARN_d(WARN_OVERFLOW)) { warner(packWARN(WARN_OVERFLOW), "Integer overflow in %s number", (base == 16) ? "hexadecimal" @@ -545,11 +562,20 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, /* Don't allow a leading underscore if the only-medial bit is * set */ && ( LIKELY(s > s0) - || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES) - != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES))) + || UNLIKELY(! ( input_flags + & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES_ONLY)))) { ++s; - goto redo; + + /* To get here with the value so-far being 0 means we've only had + * leading zeros, then an underscore. We can continue with the + * branchless switch() instead of this loop */ + if (value == 0) { + goto redo_switch; + } + else { + goto redo; + } } if (*s) { @@ -588,10 +614,10 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start, if (LIKELY(! overflowed)) { #if UVSIZE > 4 - if ( UNLIKELY(value > 0xffffffff) - && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE)) - { - output_non_portable(base); + if (UNLIKELY(value > 0xffffffff)) { + if (! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE)) { + output_non_portable(base); + } *flags |= PERL_SCAN_SILENT_NON_PORTABLE; } #endif diff --git a/perl.h b/perl.h index f63e298a3240..8707b00ad288 100644 --- a/perl.h +++ b/perl.h @@ -8313,7 +8313,11 @@ EXTERN_C int flock(int fd, int op); /* Number scan flags. All are used for input, the ones used for output are so * marked */ -#define PERL_SCAN_ALLOW_UNDERSCORES 0x01 /* grok_??? accept _ in numbers */ + +/* grok_??? accept a stand-alone underscore initially or between digits in + * numbers */ +#define PERL_SCAN_ALLOW_UNDERSCORES 0x01 + #define PERL_SCAN_DISALLOW_PREFIX 0x02 /* grok_??? reject 0x in hex etc */ /* grok_??? input: ignored; output: found overflow */ @@ -8324,24 +8328,25 @@ EXTERN_C int flock(int fd, int op); * PERL_SCAN_NOTIFY_ILLDIGIT. */ #define PERL_SCAN_SILENT_ILLDIGIT 0x08 -#define PERL_SCAN_TRAILING 0x10 /* grok_number_flags() allow trailing - and set IS_NUMBER_TRAILING */ +/* grok_number_flags() allow trailing and set IS_NUMBER_TRAILING */ +#define PERL_SCAN_TRAILING 0x10 /* These are considered experimental, so not exposed publicly */ #if defined(PERL_CORE) || defined(PERL_EXT) /* grok_??? don't warn about very large numbers which are <= UV_MAX; * output: found such a number */ -# define PERL_SCAN_SILENT_NON_PORTABLE 0x20 +# define PERL_SCAN_SILENT_NON_PORTABLE 0x20 /* If this is set on input, and no illegal digit is found, it will be cleared * on output; otherwise unchanged */ -# define PERL_SCAN_NOTIFY_ILLDIGIT 0x40 +# define PERL_SCAN_NOTIFY_ILLDIGIT 0x40 /* Don't warn on overflow; output flag still set */ -# define PERL_SCAN_SILENT_OVERFLOW 0x80 +# define PERL_SCAN_SILENT_OVERFLOW 0x80 + +/* grok_??? accept a stand-alone underscore between digits only in numbers */ +# define PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES_ONLY 0x100 -/* Forbid a leading underscore, which the other one doesn't */ -# define PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES (0x100|PERL_SCAN_ALLOW_UNDERSCORES) #endif diff --git a/proto.h b/proto.h index 7aabf8d17f1c..acd96ff396ad 100644 --- a/proto.h +++ b/proto.h @@ -1263,7 +1263,7 @@ Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result); assert(start); assert(len_p); assert(flags) PERL_CALLCONV UV -Perl_grok_bin_oct_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result, const unsigned shift, const U8 lookup_bit, const char prefix); +Perl_grok_bin_oct_hex(pTHX_ const char * const start, STRLEN *len_p, I32 *flags, NV *result, const unsigned shift, const U8 lookup_bit, const char prefix); #define PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX \ assert(start); assert(len_p); assert(flags) diff --git a/regcomp.c b/regcomp.c index c6a2e6ee0e14..56d3bb7986b8 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5352,7 +5352,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, I32 flags = PERL_SCAN_SILENT_OVERFLOW | PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_NOTIFY_ILLDIGIT - | PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES + | PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES_ONLY | PERL_SCAN_DISALLOW_PREFIX; STRLEN len = e - RExC_parse; NV overflow_value;