Skip to content

Commit 1de07f3

Browse files
committed
Add prefetch to PerfUtils
1 parent 5676231 commit 1de07f3

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

src/Util.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
#include <unordered_set>
3333
#include <vector>
3434

35+
#include <mmintrin.h>
36+
#include <xmmintrin.h>
37+
3538
namespace PerfUtils {
3639

3740
/**
@@ -195,6 +198,40 @@ serialReadPmc(int ecx) {
195198
return retVal;
196199
}
197200

201+
/**
202+
* Prefetch the cache lines containing [object, object + numBytes) into the
203+
* processor's caches.
204+
* The best docs for this are in the Intel instruction set reference under
205+
* PREFETCH.
206+
* \param object
207+
* The start of the region of memory to prefetch.
208+
* \param numBytes
209+
* The size of the region of memory to prefetch.
210+
*/
211+
static inline void
212+
prefetch(const void* object, uint64_t numBytes)
213+
{
214+
uint64_t offset = reinterpret_cast<uint64_t>(object) & 0x3fUL;
215+
const char* p = reinterpret_cast<const char*>(object) - offset;
216+
for (uint64_t i = 0; i < offset + numBytes; i += 64)
217+
_mm_prefetch(p + i, _MM_HINT_T0);
218+
}
219+
220+
/**
221+
* Prefetch the cache lines containing the given object into the
222+
* processor's caches.
223+
* The best docs for this are in the Intel instruction set reference under
224+
* PREFETCHh.
225+
* \param object
226+
* A pointer to the object in memory to prefetch.
227+
*/
228+
template<typename T>
229+
static inline void
230+
prefetch(const T* object)
231+
{
232+
prefetch(object, sizeof(*object));
233+
}
234+
198235
#define PERFUTILS_DIE(format_, ...) \
199236
do { \
200237
fprintf(stderr, format_, ##__VA_ARGS__); \

0 commit comments

Comments
 (0)