File tree Expand file tree Collapse file tree 1 file changed +37
-0
lines changed Expand file tree Collapse file tree 1 file changed +37
-0
lines changed Original file line number Diff line number Diff line change 3232#include < unordered_set>
3333#include < vector>
3434
35+ #include < mmintrin.h>
36+ #include < xmmintrin.h>
37+
3538namespace PerfUtils {
3639
3740/* *
@@ -195,6 +198,40 @@ serialReadPmc(int ecx) {
195198 return retVal;
196199}
197200
201+ /* *
202+ * Prefetch the cache lines containing [object, object + numBytes) into the
203+ * processor's caches.
204+ * The best docs for this are in the Intel instruction set reference under
205+ * PREFETCH.
206+ * \param object
207+ * The start of the region of memory to prefetch.
208+ * \param numBytes
209+ * The size of the region of memory to prefetch.
210+ */
211+ static inline void
212+ prefetch (const void * object, uint64_t numBytes)
213+ {
214+ uint64_t offset = reinterpret_cast <uint64_t >(object) & 0x3fUL ;
215+ const char * p = reinterpret_cast <const char *>(object) - offset;
216+ for (uint64_t i = 0 ; i < offset + numBytes; i += 64 )
217+ _mm_prefetch (p + i, _MM_HINT_T0);
218+ }
219+
220+ /* *
221+ * Prefetch the cache lines containing the given object into the
222+ * processor's caches.
223+ * The best docs for this are in the Intel instruction set reference under
224+ * PREFETCHh.
225+ * \param object
226+ * A pointer to the object in memory to prefetch.
227+ */
228+ template <typename T>
229+ static inline void
230+ prefetch (const T* object)
231+ {
232+ prefetch (object, sizeof (*object));
233+ }
234+
198235#define PERFUTILS_DIE (format_, ...) \
199236 do { \
200237 fprintf (stderr, format_, ##__VA_ARGS__); \
You can’t perform that action at this time.
0 commit comments