-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathJumpHash.xs
More file actions
153 lines (131 loc) · 3.63 KB
/
JumpHash.xs
File metadata and controls
153 lines (131 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#define PERL_NO_GET_CONTEXT /* we want efficiency */
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
#include "fix_inline.h"
#include <stdlib.h>
#include <sys/types.h>
/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein.
* The authors claim it is relatively secure compared to the alternatives
* and that performance wise it is a suitable hash for languages like Perl.
* See:
*
* https://www.131002.net/siphash/
*
* This implementation seems to perform slightly slower than one-at-a-time for
* short keys, but degrades slower for longer keys. Murmur Hash outperforms it
* regardless of keys size.
*
* It is 64 bit only.
*/
/* Find best way to ROTL32/ROTL64 */
#ifndef ROTL64
#if defined(_MSC_VER)
#include <stdlib.h> /* Microsoft put _rotl declaration in here */
#define ROTL64(x,r) _rotl64(x,r)
#else
/* gcc recognises this code and generates a rotate instruction for CPUs with one */
#define ROTL64(x,r) (((uint64_t)x << r) | ((uint64_t)x >> (64 - r)))
#endif
#endif
#ifndef U8TO64_LE
#define U8TO64_LE(p) \
(((uint64_t)((p)[0]) ) | \
((uint64_t)((p)[1]) << 8) | \
((uint64_t)((p)[2]) << 16) | \
((uint64_t)((p)[3]) << 24) | \
((uint64_t)((p)[4]) << 32) | \
((uint64_t)((p)[5]) << 40) | \
((uint64_t)((p)[6]) << 48) | \
((uint64_t)((p)[7]) << 56))
#endif
#define SIPROUND \
do { \
v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \
v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \
v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \
v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \
} while(0)
/* SipHash-2-4 */
PERL_STATIC_INLINE uint64_t
siphash_2_4_from_perl(const unsigned char *in, const STRLEN inlen)
{
/* "somepseudorandomlygeneratedbytes" */
uint64_t v0 = (uint64_t)(0x736f6d6570736575);
uint64_t v1 = (uint64_t)(0x646f72616e646f6d);
uint64_t v2 = (uint64_t)(0x6c7967656e657261);
uint64_t v3 = (uint64_t)(0x7465646279746573);
uint64_t b;
uint64_t k0 = 0;
uint64_t k1 = 0;
uint64_t m;
const int left = inlen & 7;
const U8 *end = in + inlen - left;
b = ( ( uint64_t )(inlen) ) << 56;
v3 ^= k1;
v2 ^= k0;
v1 ^= k1;
v0 ^= k0;
for ( ; in != end; in += 8 )
{
m = U8TO64_LE( in );
v3 ^= m;
SIPROUND;
SIPROUND;
v0 ^= m;
}
switch( left )
{
case 7: b |= ( ( uint64_t )in[ 6] ) << 48;
case 6: b |= ( ( uint64_t )in[ 5] ) << 40;
case 5: b |= ( ( uint64_t )in[ 4] ) << 32;
case 4: b |= ( ( uint64_t )in[ 3] ) << 24;
case 3: b |= ( ( uint64_t )in[ 2] ) << 16;
case 2: b |= ( ( uint64_t )in[ 1] ) << 8;
case 1: b |= ( ( uint64_t )in[ 0] ); break;
case 0: break;
}
v3 ^= b;
SIPROUND;
SIPROUND;
v0 ^= b;
v2 ^= 0xff;
SIPROUND;
SIPROUND;
SIPROUND;
SIPROUND;
b = v0 ^ v1 ^ v2 ^ v3;
return b;
/*return (U32)(b & U32_MAX);*/
}
int32_t
JumpConsistentHash(uint64_t key, int32_t num_buckets)
{
int64_t b = 1;
int64_t j = 0;
while (j < num_buckets) {
b = j;
key = key * 2862933555777941757ULL + 1;
j = (b + 1) * ((double)(1LL << 31) / (double)((key >> 33) + 1));
}
return b;
}
MODULE = Algorithm::ConsistentHash::JumpHash PACKAGE = Algorithm::ConsistentHash::JumpHash
PROTOTYPES: DISABLE
int32_t
jumphash_numeric(uint64_t key, int32_t num_buckets)
CODE:
RETVAL = JumpConsistentHash(key, num_buckets);
OUTPUT: RETVAL
int32_t
jumphash_siphash(SV *str, uint64_t num_buckets)
CODE:
{
STRLEN len;
/* FIXME */
const char * strval = SvPVbyte(str, len);
const uint64_t hashval = siphash_2_4_from_perl(strval, len);
RETVAL = JumpConsistentHash(hashval, num_buckets);
}
OUTPUT: RETVAL