1 // Written in the D programming language.
2 
3 /**
4 
5 $(BOOKTABLE ,
6 $(TR $(TH Category) $(TH Functions))
7 $(TR $(TDNW Template API) $(TD $(MYREF XXHash)))
8 $(TR $(TDNW Helpers) $(TD $(MYREF xxhashOf)))
9 )
10 
11  * xxhash: Extremely fast non-cryptographic hash algorithm
12  *
13  * Example:
14  * -----
15  * uint hashed = xxhashOf(cast(ubyte[])"D-man is so cute");
16  * assert(hashed == 3228048616);
17  * -----
18  *
19  * See_Also:
20  *  $(LINK2 https://code.google.com/p/xxhash/, xxhash - Extremely fast non-cryptographic hash algorithm)
21  *
22  * Copyright: Copyright Masahiro Nakagawa 2014-.
23  * License:   <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
24  * Authors:   Masahiro Nakagawa
25  */
26 module xxhash;
27 
28 import std.bitmanip : swapEndian;
29 
30 /**
31  * Computes xxhash hashes of arbitrary data.
32  *
33  * Returns:
34  *  4 byte hash value.
35  */
36 @trusted pure nothrow
37 uint xxhashOf(in ubyte[] source, uint seed = 0)
38 {
39     auto srcPtr = cast(const(uint)*)source.ptr;
40     auto srcEnd = cast(const(uint)*)(source.ptr + source.length);
41     uint result = void;
42 
43     if (source.length >= 16) {
44         auto limit = srcEnd - 4;
45         uint v1 = seed + Prime32_1 + Prime32_2;
46         uint v2 = seed + Prime32_2;
47         uint v3 = seed;
48         uint v4 = seed - Prime32_1;
49 
50         do {
51             mixin(UpdateValuesRound);
52         } while (srcPtr <= limit);
53 
54         result = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18);
55     } else {
56         result = seed + Prime32_5;
57     }
58 
59     result += source.length;
60 
61     while (srcPtr <= srcEnd - 1) {
62         result += loadUint(srcPtr) * Prime32_3;
63         result = rotateLeft(result, 17) * Prime32_4;
64         srcPtr++;
65     }
66 
67     auto ptr = cast(const(ubyte)*)srcPtr;
68     auto end = cast(const(ubyte)*)srcEnd;
69 
70     mixin(FinishRound);
71 
72     return result;
73 }
74 
75 /**
76  * xxhash object implements std.digest like API for supporting streaming update.
77  *
78  * Example:
79  * -----
80  * XXHash xh;
81  *
82  * xh.start();
83  * foreach (chunk; chunks(cast(ubyte[])"D-man is so cute!", 2))
84  *     xh.put(chunk);
85  * auto hashed = xh.finish();
86  * -----
87  */
88 struct XXHash
89 {
90   private:
91     uint _seed = 0;
92     uint _v1, _v2, _v3, _v4;
93     ubyte[16] _memory;
94     size_t _memorySize;
95     ulong _totalLength;
96 
97   public:
98     @safe pure nothrow
99     {
100         /**
101          * Constructs XXHash with seed.
102          */
103         this(uint seed)
104         {
105             _seed = seed;
106         }
107 
108         /**
109          * Used to (re)initialize the XXHash.
110          */
111         void start()
112         {
113             _v1 = _seed + Prime32_1 + Prime32_2;
114             _v2 = _seed + Prime32_2;
115             _v3 = _seed;
116             _v4 = _seed - Prime32_1;
117             _memorySize = 0;
118             _totalLength = 0;
119         }
120 
121         /**
122          * Use this to feed the hash with data.
123          * Also implements the $(XREF range, OutputRange) interface for $(D ubyte) and $(D const(ubyte)[]).
124          */
125         @trusted
126         void put(scope const(ubyte)[] data...)
127         {
128             auto ptr = data.ptr;
129             auto end = ptr + data.length;
130 
131             _totalLength += data.length;
132 
133             if (_memorySize + data.length < 16) {
134                 _memory[_memorySize.._memorySize + data.length] = data;
135                 _memorySize += data.length;
136                 return;
137             }
138 
139             if (_memorySize > 0) {
140                 auto sliceSize = 16 - _memorySize;
141                 _memory[_memorySize.._memorySize + sliceSize] = data[0..sliceSize];
142 
143                 auto v1 = _v1, v2 = _v2, v3 = _v3, v4 = _v4;
144                 auto srcPtr = cast(const(uint)*)_memory.ptr;
145 
146                 mixin(UpdateValuesRound);
147 
148                 ptr += sliceSize;
149                 _memorySize = 0;
150                 _v1 = v1; _v2 = v2; _v3 = v3, _v4 = v4;
151             }
152 
153             if (ptr <= end - 16) {
154                 auto srcPtr = cast(const(uint)*)ptr;
155                 auto limit = cast(const(uint)*)(end - 16);
156                 auto v1 = _v1, v2 = _v2, v3 = _v3, v4 = _v4;
157 
158                 do {
159                     mixin(UpdateValuesRound);
160                 } while (srcPtr <= limit);
161 
162                 ptr = cast(const(ubyte)*)srcPtr;
163                 _v1 = v1; _v2 = v2; _v3 = v3, _v4 = v4;
164             }
165 
166             if (ptr < end) {
167                 auto remain = end - ptr;
168                 _memory[0..remain] = ptr[0..remain];
169                 _memorySize = remain;
170             }
171         }
172 
173         /**
174          * Returns the finished XXHash hash.
175          * This also calls $(LREF start) to reset the internal state.
176          */
177         @trusted
178         uint finish()
179         {
180             auto ptr = _memory.ptr;
181             auto end = ptr + _memorySize;
182             uint result = void;
183 
184             if (_totalLength >= 16)
185                 result = rotateLeft(_v1, 1) + rotateLeft(_v2, 7) + rotateLeft(_v3, 12) + rotateLeft(_v4, 18);
186             else
187                 result = _seed + Prime32_5;
188 
189             result += cast(uint)_totalLength;
190 
191             while (ptr <= end - 4) {
192                 result += loadUint(cast(const(uint)*)ptr) * Prime32_3;
193                 result = rotateLeft(result, 17) * Prime32_4;
194                 ptr += 4;
195             }
196 
197             mixin(FinishRound);
198 
199             start();
200 
201             return result;
202         }
203     }
204 }
205 
206 private:
207 
208 enum UpdateValuesRound = "
209     v1 += loadUint(srcPtr) * Prime32_2; v1 = rotateLeft(v1, 13);
210     v1 *= Prime32_1; srcPtr++;
211     v2 += loadUint(srcPtr) * Prime32_2; v2 = rotateLeft(v2, 13);
212     v2 *= Prime32_1; srcPtr++;
213     v3 += loadUint(srcPtr) * Prime32_2; v3 = rotateLeft(v3, 13);
214     v3 *= Prime32_1; srcPtr++;
215     v4 += loadUint(srcPtr) * Prime32_2; v4 = rotateLeft(v4, 13);
216     v4 *= Prime32_1; srcPtr++;
217 ";
218 
219 enum FinishRound = "
220     while (ptr < end) {
221         result += *ptr * Prime32_5;
222         result = rotateLeft(result, 11) * Prime32_1 ;
223         ptr++;
224     }
225 
226     result ^= result >> 15;
227     result *= Prime32_2;
228     result ^= result >> 13;
229     result *= Prime32_3;
230     result ^= result >> 16;
231 ";
232 
233 enum Prime32_1 = 2654435761U;
234 enum Prime32_2 = 2246822519U;
235 enum Prime32_3 = 3266489917U;
236 enum Prime32_4 = 668265263U;
237 enum Prime32_5 = 374761393U;
238 
239 @safe pure nothrow
240 uint rotateLeft(in uint x, in uint n)
241 {
242     return (x << n) | (x >> (32 - n));
243 }
244 
245 @safe pure nothrow
246 uint loadUint(in uint* source)
247 {
248     version (LittleEndian)
249         return *source;
250     else
251         return swapEndian(*source);
252 }
253 
254 unittest
255 {
256     import std.range : chunks;
257 
258     void runTests(ubyte[] sentence, uint seed, uint expected)
259     {
260         assert(xxhashOf(sentence, seed) == expected, "xxhashOf failed");
261 
262         XXHash xh = XXHash(seed);
263 
264         xh.start();
265         xh.put(sentence);
266         assert(xh.finish() == expected, "XXHash once failed");
267 
268         xh.start();
269         foreach (chunk; chunks(sentence, 1))
270             xh.put(chunk);
271         assert(xh.finish() == expected, "XXHash streaming failed");
272     }
273 
274     enum PRIME = 2654435761U;
275     uint random = PRIME;
276     ubyte[101] sanityBuffer;
277 
278     foreach (i; 0..sanityBuffer.length) {
279         sanityBuffer[i] = cast(ubyte)(random >> 24);
280         random *= random;
281     }
282 
283     // test cases from xxhash's bench.c
284     auto length = sanityBuffer.length;
285     runTests(sanityBuffer[0..1],      0, 0xB85CBEE5);
286     runTests(sanityBuffer[0..1],  PRIME, 0xD5845D64);
287     runTests(sanityBuffer[0..14],     0, 0xE5AA0AB4);
288     runTests(sanityBuffer[0..14], PRIME, 0x4481951D);
289     runTests(sanityBuffer,            0, 0x1F1AA412);
290     runTests(sanityBuffer,        PRIME, 0x498EC8E2);
291 }