|
7 | 7 | #include <algorithm> |
8 | 8 | #include <cassert> |
9 | 9 | #include <cstdlib> |
10 | | -#include <numeric> |
11 | | - |
12 | | -#if defined(_WIN32) |
13 | | - |
14 | | -#include <intrin.h> |
15 | | -#include <float.h> |
16 | | -#define Q_INTRINSIC_MUL_OVERFLOW64 |
17 | | -#define Q_UMULH(v1, v2) __umulh(v1, v2); |
18 | | -#define Q_SMULH(v1, v2) __mulh(v1, v2); |
19 | | -#pragma intrinsic(__umulh) |
20 | | -#pragma intrinsic(__mulh) |
21 | | - |
22 | | - |
23 | | -#endif |
| 10 | +#include "../JetNumeric.h" |
24 | 11 |
|
25 | 12 | struct CalculateGrowingBlockSizeResult |
26 | 13 | { |
27 | 14 | uint32_t size; |
28 | 15 | uint32_t elementCount; |
29 | 16 | }; |
30 | 17 |
|
31 | | -#if ((defined(Q_CC_INTEL) ? (Q_CC_INTEL >= 1800 && !defined(Q_OS_WIN)) : defined(Q_CC_GNU)) \ |
32 | | - && Q_CC_GNU >= 500) || __has_builtin(__builtin_add_overflow) |
33 | | - |
34 | | -template <typename T> inline |
35 | | -typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type |
36 | | -add_overflow(T v1, T v2, T *r) |
37 | | -{ return __builtin_add_overflow(v1, v2, r); } |
38 | | - |
39 | | -template <typename T> inline |
40 | | -typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type |
41 | | -sub_overflow(T v1, T v2, T *r) |
42 | | -{ return __builtin_sub_overflow(v1, v2, r); } |
43 | | - |
44 | | -template <typename T> inline |
45 | | -typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type |
46 | | -mul_overflow(T v1, T v2, T *r) |
47 | | -{ return __builtin_mul_overflow(v1, v2, r); } |
48 | | - |
49 | | -#else |
50 | | - |
51 | | -template <typename T> inline typename std::enable_if<std::is_unsigned<T>::value, bool>::type |
52 | | -add_overflow(T v1, T v2, T* r) |
53 | | -{ |
54 | | - // unsigned additions are well-defined |
55 | | - *r = v1 + v2; |
56 | | - return v1 > T(v1 + v2); |
57 | | -} |
58 | | - |
59 | | -template <typename T> inline typename std::enable_if<std::is_signed<T>::value, bool>::type |
60 | | -add_overflow(T v1, T v2, T* r) |
61 | | -{ |
62 | | - // Here's how we calculate the overflow: |
63 | | - // 1) unsigned addition is well-defined, so we can always execute it |
64 | | - // 2) conversion from unsigned back to signed is implementation- |
65 | | - // defined and in the implementations we use, it's a no-op. |
66 | | - // 3) signed integer overflow happens if the sign of the two input operands |
67 | | - // is the same but the sign of the result is different. In other words, |
68 | | - // the sign of the result must be the same as the sign of either |
69 | | - // operand. |
70 | | - |
71 | | - using U = typename std::make_unsigned<T>::type; |
72 | | - *r = T(U(v1) + U(v2)); |
73 | | - |
74 | | - // If int is two's complement, assume all integer types are too. |
75 | | - if (std::is_same<int32_t, int>::value) { |
76 | | - // Two's complement equivalent (generates slightly shorter code): |
77 | | - // x ^ y is negative if x and y have different signs |
78 | | - // x & y is negative if x and y are negative |
79 | | - // (x ^ z) & (y ^ z) is negative if x and z have different signs |
80 | | - // AND y and z have different signs |
81 | | - return ((v1 ^ *r) & (v2 ^ *r)) < 0; |
82 | | - } |
83 | | - |
84 | | - bool s1 = (v1 < 0); |
85 | | - bool s2 = (v2 < 0); |
86 | | - bool sr = (*r < 0); |
87 | | - return s1 != sr && s2 != sr; |
88 | | - // also: return s1 == s2 && s1 != sr; |
89 | | -} |
90 | | - |
91 | | -template <typename T> inline typename std::enable_if<std::is_unsigned<T>::value, bool>::type |
92 | | -sub_overflow(T v1, T v2, T* r) |
93 | | -{ |
94 | | - // unsigned subtractions are well-defined |
95 | | - *r = v1 - v2; |
96 | | - return v1 < v2; |
97 | | -} |
98 | | - |
99 | | -template <typename T> inline typename std::enable_if<std::is_signed<T>::value, bool>::type |
100 | | -sub_overflow(T v1, T v2, T* r) |
101 | | -{ |
102 | | - // See above for explanation. This is the same with some signs reversed. |
103 | | - // We can't use add_overflow(v1, -v2, r) because it would be UB if |
104 | | - // v2 == std::numeric_limits<T>::min(). |
105 | | - |
106 | | - using U = typename std::make_unsigned<T>::type; |
107 | | - *r = T(U(v1) - U(v2)); |
108 | | - |
109 | | - if (std::is_same<int32_t, int>::value) |
110 | | - return ((v1 ^ *r) & (~v2 ^ *r)) < 0; |
111 | | - |
112 | | - bool s1 = (v1 < 0); |
113 | | - bool s2 = !(v2 < 0); |
114 | | - bool sr = (*r < 0); |
115 | | - return s1 != sr && s2 != sr; |
116 | | - // also: return s1 == s2 && s1 != sr; |
117 | | -} |
118 | | - |
119 | | -inline bool mul_overflow(uint64_t v1, uint64_t v2, uint64_t* r) |
120 | | -{ |
121 | | - *r = v1 * v2; |
122 | | - return Q_UMULH(v1, v2); |
123 | | -} |
124 | | - |
125 | | -inline bool mul_overflow(int64_t v1, int64_t v2, int64_t* r) |
126 | | -{ |
127 | | - // This is slightly more complex than the unsigned case above: the sign bit |
128 | | - // of 'low' must be replicated as the entire 'high', so the only valid |
129 | | - // values for 'high' are 0 and -1. Use unsigned multiply since it's the same |
130 | | - // as signed for the low bits and use a signed right shift to verify that |
131 | | - // 'high' is nothing but sign bits that match the sign of 'low'. |
132 | | - |
133 | | - int64_t high = Q_SMULH(v1, v2); |
134 | | - *r = int64_t(uint64_t(v1) * uint64_t(v2)); |
135 | | - return (*r >> 63) != high; |
136 | | -} |
137 | | - |
138 | | -#endif |
139 | | - |
140 | 18 | uint32_t qCalculateBlockSize(uint32_t elementCount, uint32_t elementSize, uint32_t headerSize) noexcept |
141 | 19 | { |
142 | 20 | J_ASSERT(elementSize); |
143 | 21 |
|
144 | | - size_t bytes; |
145 | | - if (unlikely(mul_overflow(size_t(elementSize), size_t(elementCount), &bytes)) || |
146 | | - unlikely(add_overflow(bytes, size_t(headerSize), &bytes))) |
| 22 | + uint32_t bytes; |
| 23 | + static_assert(sizeof(size_t) == 8, ""); |
| 24 | + |
| 25 | + if (unlikely(mul_overflow(uint32_t(elementSize), uint32_t(elementCount), &bytes)) || |
| 26 | + unlikely(add_overflow(bytes, uint32_t(headerSize), &bytes))) |
147 | 27 | return -1; |
148 | 28 | if (unlikely(uint32_t(bytes) < 0)) |
149 | 29 | return -1; |
150 | 30 |
|
151 | | - return uint32_t(bytes); |
| 31 | + return bytes; |
152 | 32 | } |
153 | 33 |
|
154 | 34 | constexpr inline uint64_t qNextPowerOfTwo(uint64_t v) |
|
0 commit comments