1 // Written in the D programming language. 2 3 /** 4 5 $(BOOKTABLE , 6 $(TR $(TH Category) $(TH Functions)) 7 $(TR $(TDNW Template API) $(TD $(MYREF XXHash))) 8 $(TR $(TDNW Helpers) $(TD $(MYREF xxhashOf))) 9 ) 10 11 * xxhash: Extremely fast non-cryptographic hash algorithm 12 * 13 * Example: 14 * ----- 15 * uint hashed = xxhashOf(cast(ubyte[])"D-man is so cute"); 16 * assert(hashed == 3228048616); 17 * ----- 18 * 19 * See_Also: 20 * $(LINK2 https://code.google.com/p/xxhash/, xxhash - Extremely fast non-cryptographic hash algorithm) 21 * 22 * Copyright: Copyright Masahiro Nakagawa 2014-. 23 * License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 24 * Authors: Masahiro Nakagawa 25 */ 26 module xxhash; 27 28 import std.bitmanip : swapEndian; 29 30 /** 31 * Computes xxhash hashes of arbitrary data. 32 * 33 * Returns: 34 * 4 byte hash value. 35 */ 36 @trusted pure nothrow 37 uint xxhashOf(in ubyte[] source, uint seed = 0) 38 { 39 auto srcPtr = cast(const(uint)*)source.ptr; 40 auto srcEnd = cast(const(uint)*)(source.ptr + source.length); 41 uint result = void; 42 43 if (source.length >= 16) { 44 auto limit = srcEnd - 4; 45 uint v1 = seed + Prime32_1 + Prime32_2; 46 uint v2 = seed + Prime32_2; 47 uint v3 = seed; 48 uint v4 = seed - Prime32_1; 49 50 do { 51 mixin(UpdateValuesRound); 52 } while (srcPtr <= limit); 53 54 result = rotateLeft(v1, 1) + rotateLeft(v2, 7) + rotateLeft(v3, 12) + rotateLeft(v4, 18); 55 } else { 56 result = seed + Prime32_5; 57 } 58 59 result += source.length; 60 61 while (srcPtr <= srcEnd - 1) { 62 result += loadUint(srcPtr) * Prime32_3; 63 result = rotateLeft(result, 17) * Prime32_4; 64 srcPtr++; 65 } 66 67 auto ptr = cast(const(ubyte)*)srcPtr; 68 auto end = cast(const(ubyte)*)srcEnd; 69 70 mixin(FinishRound); 71 72 return result; 73 } 74 75 /** 76 * xxhash object implements std.digest like API for supporting streaming update. 77 * 78 * Example: 79 * ----- 80 * XXHash xh; 81 * 82 * xh.start(); 83 * foreach (chunk; chunks(cast(ubyte[])"D-man is so cute!", 2)) 84 * xh.put(chunk); 85 * auto hashed = xh.finish(); 86 * ----- 87 */ 88 struct XXHash 89 { 90 private: 91 uint _seed = 0; 92 uint _v1, _v2, _v3, _v4; 93 ubyte[16] _memory; 94 size_t _memorySize; 95 ulong _totalLength; 96 97 public: 98 @safe pure nothrow 99 { 100 /** 101 * Constructs XXHash with seed. 102 */ 103 this(uint seed) 104 { 105 _seed = seed; 106 } 107 108 /** 109 * Used to (re)initialize the XXHash. 110 */ 111 void start() 112 { 113 _v1 = _seed + Prime32_1 + Prime32_2; 114 _v2 = _seed + Prime32_2; 115 _v3 = _seed; 116 _v4 = _seed - Prime32_1; 117 _memorySize = 0; 118 _totalLength = 0; 119 } 120 121 /** 122 * Use this to feed the hash with data. 123 * Also implements the $(XREF range, OutputRange) interface for $(D ubyte) and $(D const(ubyte)[]). 124 */ 125 @trusted 126 void put(scope const(ubyte)[] data...) 127 { 128 auto ptr = data.ptr; 129 auto end = ptr + data.length; 130 131 _totalLength += data.length; 132 133 if (_memorySize + data.length < 16) { 134 _memory[_memorySize.._memorySize + data.length] = data; 135 _memorySize += data.length; 136 return; 137 } 138 139 if (_memorySize > 0) { 140 auto sliceSize = 16 - _memorySize; 141 _memory[_memorySize.._memorySize + sliceSize] = data[0..sliceSize]; 142 143 auto v1 = _v1, v2 = _v2, v3 = _v3, v4 = _v4; 144 auto srcPtr = cast(const(uint)*)_memory.ptr; 145 146 mixin(UpdateValuesRound); 147 148 ptr += sliceSize; 149 _memorySize = 0; 150 _v1 = v1; _v2 = v2; _v3 = v3, _v4 = v4; 151 } 152 153 if (ptr <= end - 16) { 154 auto srcPtr = cast(const(uint)*)ptr; 155 auto limit = cast(const(uint)*)(end - 16); 156 auto v1 = _v1, v2 = _v2, v3 = _v3, v4 = _v4; 157 158 do { 159 mixin(UpdateValuesRound); 160 } while (srcPtr <= limit); 161 162 ptr = cast(const(ubyte)*)srcPtr; 163 _v1 = v1; _v2 = v2; _v3 = v3, _v4 = v4; 164 } 165 166 if (ptr < end) { 167 auto remain = end - ptr; 168 _memory[0..remain] = ptr[0..remain]; 169 _memorySize = remain; 170 } 171 } 172 173 /** 174 * Returns the finished XXHash hash. 175 * This also calls $(LREF start) to reset the internal state. 176 */ 177 @trusted 178 uint finish() 179 { 180 auto ptr = _memory.ptr; 181 auto end = ptr + _memorySize; 182 uint result = void; 183 184 if (_totalLength >= 16) 185 result = rotateLeft(_v1, 1) + rotateLeft(_v2, 7) + rotateLeft(_v3, 12) + rotateLeft(_v4, 18); 186 else 187 result = _seed + Prime32_5; 188 189 result += cast(uint)_totalLength; 190 191 while (ptr <= end - 4) { 192 result += loadUint(cast(const(uint)*)ptr) * Prime32_3; 193 result = rotateLeft(result, 17) * Prime32_4; 194 ptr += 4; 195 } 196 197 mixin(FinishRound); 198 199 start(); 200 201 return result; 202 } 203 } 204 } 205 206 private: 207 208 enum UpdateValuesRound = " 209 v1 += loadUint(srcPtr) * Prime32_2; v1 = rotateLeft(v1, 13); 210 v1 *= Prime32_1; srcPtr++; 211 v2 += loadUint(srcPtr) * Prime32_2; v2 = rotateLeft(v2, 13); 212 v2 *= Prime32_1; srcPtr++; 213 v3 += loadUint(srcPtr) * Prime32_2; v3 = rotateLeft(v3, 13); 214 v3 *= Prime32_1; srcPtr++; 215 v4 += loadUint(srcPtr) * Prime32_2; v4 = rotateLeft(v4, 13); 216 v4 *= Prime32_1; srcPtr++; 217 "; 218 219 enum FinishRound = " 220 while (ptr < end) { 221 result += *ptr * Prime32_5; 222 result = rotateLeft(result, 11) * Prime32_1 ; 223 ptr++; 224 } 225 226 result ^= result >> 15; 227 result *= Prime32_2; 228 result ^= result >> 13; 229 result *= Prime32_3; 230 result ^= result >> 16; 231 "; 232 233 enum Prime32_1 = 2654435761U; 234 enum Prime32_2 = 2246822519U; 235 enum Prime32_3 = 3266489917U; 236 enum Prime32_4 = 668265263U; 237 enum Prime32_5 = 374761393U; 238 239 @safe pure nothrow 240 uint rotateLeft(in uint x, in uint n) 241 { 242 return (x << n) | (x >> (32 - n)); 243 } 244 245 @safe pure nothrow 246 uint loadUint(in uint* source) 247 { 248 version (LittleEndian) 249 return *source; 250 else 251 return swapEndian(*source); 252 } 253 254 unittest 255 { 256 import std.range : chunks; 257 258 void runTests(ubyte[] sentence, uint seed, uint expected) 259 { 260 assert(xxhashOf(sentence, seed) == expected, "xxhashOf failed"); 261 262 XXHash xh = XXHash(seed); 263 264 xh.start(); 265 xh.put(sentence); 266 assert(xh.finish() == expected, "XXHash once failed"); 267 268 xh.start(); 269 foreach (chunk; chunks(sentence, 1)) 270 xh.put(chunk); 271 assert(xh.finish() == expected, "XXHash streaming failed"); 272 } 273 274 enum PRIME = 2654435761U; 275 uint random = PRIME; 276 ubyte[101] sanityBuffer; 277 278 foreach (i; 0..sanityBuffer.length) { 279 sanityBuffer[i] = cast(ubyte)(random >> 24); 280 random *= random; 281 } 282 283 // test cases from xxhash's bench.c 284 auto length = sanityBuffer.length; 285 runTests(sanityBuffer[0..1], 0, 0xB85CBEE5); 286 runTests(sanityBuffer[0..1], PRIME, 0xD5845D64); 287 runTests(sanityBuffer[0..14], 0, 0xE5AA0AB4); 288 runTests(sanityBuffer[0..14], PRIME, 0x4481951D); 289 runTests(sanityBuffer, 0, 0x1F1AA412); 290 runTests(sanityBuffer, PRIME, 0x498EC8E2); 291 }