mirror of https://github.com/trapexit/mergerfs.git
Antonio SJ Musumeci
3 weeks ago
1 changed files with 323 additions and 0 deletions
-
323src/rapidhash.h
@ -0,0 +1,323 @@ |
|||
/* |
|||
* rapidhash - Very fast, high quality, platform-independent hashing algorithm. |
|||
* Copyright (C) 2024 Nicolas De Carli |
|||
* |
|||
* Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net> |
|||
* |
|||
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) |
|||
* |
|||
* Redistribution and use in source and binary forms, with or without |
|||
* modification, are permitted provided that the following conditions are |
|||
* met: |
|||
* |
|||
* * Redistributions of source code must retain the above copyright |
|||
* notice, this list of conditions and the following disclaimer. |
|||
* * Redistributions in binary form must reproduce the above |
|||
* copyright notice, this list of conditions and the following disclaimer |
|||
* in the documentation and/or other materials provided with the |
|||
* distribution. |
|||
* |
|||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
* |
|||
* You can contact the author at: |
|||
* - rapidhash source repository: https://github.com/Nicoshev/rapidhash |
|||
*/ |
|||
|
|||
/* |
|||
* Includes. |
|||
*/ |
|||
#include <stdint.h> |
|||
#include <string.h> |
|||
#if defined(_MSC_VER) |
|||
#include <intrin.h> |
|||
#if defined(_M_X64) && !defined(_M_ARM64EC) |
|||
#pragma intrinsic(_umul128) |
|||
#endif |
|||
#endif |
|||
|
|||
/* |
|||
* C++ macros. |
|||
* |
|||
* RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)). |
|||
*/ |
|||
#ifdef __cplusplus |
|||
#define RAPIDHASH_NOEXCEPT noexcept |
|||
#define RAPIDHASH_CONSTEXPR constexpr |
|||
#ifndef RAPIDHASH_INLINE |
|||
#define RAPIDHASH_INLINE inline |
|||
#endif |
|||
#else |
|||
#define RAPIDHASH_NOEXCEPT |
|||
#define RAPIDHASH_CONSTEXPR static const |
|||
#ifndef RAPIDHASH_INLINE |
|||
#define RAPIDHASH_INLINE static inline |
|||
#endif |
|||
#endif |
|||
|
|||
/* |
|||
* Protection macro, alters behaviour of rapid_mum multiplication function. |
|||
* |
|||
* RAPIDHASH_FAST: Normal behavior, max speed. |
|||
* RAPIDHASH_PROTECTED: Extra protection against entropy loss. |
|||
*/ |
|||
#ifndef RAPIDHASH_PROTECTED |
|||
#define RAPIDHASH_FAST |
|||
#elif defined(RAPIDHASH_FAST) |
|||
#error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously." |
|||
#endif |
|||
|
|||
/* |
|||
* Unrolling macros, changes code definition for main hash function. |
|||
* |
|||
* RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes. |
|||
* RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes. |
|||
* |
|||
* Most modern CPUs should benefit from having RAPIDHASH_UNROLLED. |
|||
* |
|||
* These macros do not alter the output hash. |
|||
*/ |
|||
#ifndef RAPIDHASH_COMPACT |
|||
#define RAPIDHASH_UNROLLED |
|||
#elif defined(RAPIDHASH_UNROLLED) |
|||
#error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously." |
|||
#endif |
|||
|
|||
/* |
|||
* Likely and unlikely macros. |
|||
*/ |
|||
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) |
|||
#define _likely_(x) __builtin_expect(x,1) |
|||
#define _unlikely_(x) __builtin_expect(x,0) |
|||
#else |
|||
#define _likely_(x) (x) |
|||
#define _unlikely_(x) (x) |
|||
#endif |
|||
|
|||
/* |
|||
* Endianness macros. |
|||
*/ |
|||
#ifndef RAPIDHASH_LITTLE_ENDIAN |
|||
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) |
|||
#define RAPIDHASH_LITTLE_ENDIAN |
|||
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) |
|||
#define RAPIDHASH_BIG_ENDIAN |
|||
#else |
|||
#warning "could not determine endianness! Falling back to little endian." |
|||
#define RAPIDHASH_LITTLE_ENDIAN |
|||
#endif |
|||
#endif |
|||
|
|||
/* |
|||
* Default seed. |
|||
*/ |
|||
#define RAPID_SEED (0xbdd89aa982704029ull) |
|||
|
|||
/* |
|||
* Default secret parameters. |
|||
*/ |
|||
RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull}; |
|||
|
|||
/* |
|||
* 64*64 -> 128bit multiply function. |
|||
* |
|||
* @param A Address of 64-bit number. |
|||
* @param B Address of 64-bit number. |
|||
* |
|||
* Calculates 128-bit C = *A * *B. |
|||
* |
|||
* When RAPIDHASH_FAST is defined: |
|||
* Overwrites A contents with C's low 64 bits. |
|||
* Overwrites B contents with C's high 64 bits. |
|||
* |
|||
* When RAPIDHASH_PROTECTED is defined: |
|||
* Xors and overwrites A contents with C's low 64 bits. |
|||
* Xors and overwrites B contents with C's high 64 bits. |
|||
*/ |
|||
RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT { |
|||
#if defined(__SIZEOF_INT128__) |
|||
__uint128_t r=*A; r*=*B; |
|||
#ifdef RAPIDHASH_PROTECTED |
|||
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64); |
|||
#else |
|||
*A=(uint64_t)r; *B=(uint64_t)(r>>64); |
|||
#endif |
|||
#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) |
|||
#if defined(_M_X64) |
|||
#ifdef RAPIDHASH_PROTECTED |
|||
uint64_t a, b; |
|||
a=_umul128(*A,*B,&b); |
|||
*A^=a; *B^=b; |
|||
#else |
|||
*A=_umul128(*A,*B,B); |
|||
#endif |
|||
#else |
|||
#ifdef RAPIDHASH_PROTECTED |
|||
uint64_t a, b; |
|||
b = __umulh(*A, *B); |
|||
a = *A * *B; |
|||
*A^=a; *B^=b; |
|||
#else |
|||
uint64_t c = __umulh(*A, *B); |
|||
*A = *A * *B; |
|||
*B = c; |
|||
#endif |
|||
#endif |
|||
#else |
|||
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; |
|||
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl; |
|||
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c; |
|||
#ifdef RAPIDHASH_PROTECTED |
|||
*A^=lo; *B^=hi; |
|||
#else |
|||
*A=lo; *B=hi; |
|||
#endif |
|||
#endif |
|||
} |
|||
|
|||
/* |
|||
* Multiply and xor mix function. |
|||
* |
|||
* @param A 64-bit number. |
|||
* @param B 64-bit number. |
|||
* |
|||
* Calculates 128-bit C = A * B. |
|||
* Returns 64-bit xor between high and low 64 bits of C. |
|||
*/ |
|||
RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT { rapid_mum(&A,&B); return A^B; } |
|||
|
|||
/* |
|||
* Read functions. |
|||
*/ |
|||
#ifdef RAPIDHASH_LITTLE_ENDIAN |
|||
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;} |
|||
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;} |
|||
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) |
|||
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);} |
|||
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);} |
|||
#elif defined(_MSC_VER) |
|||
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);} |
|||
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);} |
|||
#else |
|||
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { |
|||
uint64_t v; memcpy(&v, p, 8); |
|||
return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000)); |
|||
} |
|||
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { |
|||
uint32_t v; memcpy(&v, p, 4); |
|||
return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000)); |
|||
} |
|||
#endif |
|||
|
|||
/* |
|||
* Reads and combines 3 bytes of input. |
|||
* |
|||
* @param p Buffer to read from. |
|||
* @param k Length of @p, in bytes. |
|||
* |
|||
* Always reads and combines 3 bytes from memory. |
|||
* Guarantees to read each buffer position at least once. |
|||
* |
|||
* Returns a 64-bit value containing all three bytes read. |
|||
*/ |
|||
RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];} |
|||
|
|||
/* |
|||
* rapidhash main function. |
|||
* |
|||
* @param key Buffer to be hashed. |
|||
* @param len @key length, in bytes. |
|||
* @param seed 64-bit seed used to alter the hash result predictably. |
|||
* @param secret Triplet of 64-bit secrets used to alter hash result predictably. |
|||
* |
|||
* Returns a 64-bit hash. |
|||
*/ |
|||
RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT { |
|||
const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b; |
|||
if(_likely_(len<=16)){ |
|||
if(_likely_(len>=4)){ |
|||
const uint8_t * plast = p + len - 4; |
|||
a = (rapid_read32(p) << 32) | rapid_read32(plast); |
|||
const uint64_t delta = ((len&24)>>(len>>3)); |
|||
b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); } |
|||
else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;} |
|||
else a=b=0; |
|||
} |
|||
else{ |
|||
size_t i=len; |
|||
if(_unlikely_(i>48)){ |
|||
uint64_t see1=seed, see2=seed; |
|||
#ifdef RAPIDHASH_UNROLLED |
|||
while(_likely_(i>=96)){ |
|||
seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); |
|||
see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); |
|||
see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); |
|||
seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed); |
|||
see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1); |
|||
see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2); |
|||
p+=96; i-=96; |
|||
} |
|||
if(_unlikely_(i>=48)){ |
|||
seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); |
|||
see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); |
|||
see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); |
|||
p+=48; i-=48; |
|||
} |
|||
#else |
|||
do { |
|||
seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); |
|||
see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); |
|||
see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); |
|||
p+=48; i-=48; |
|||
} while (_likely_(i>=48)); |
|||
#endif |
|||
seed^=see1^see2; |
|||
} |
|||
if(i>16){ |
|||
seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]); |
|||
if(i>32) |
|||
seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed); |
|||
} |
|||
a=rapid_read64(p+i-16); b=rapid_read64(p+i-8); |
|||
} |
|||
a^=secret[1]; b^=seed; rapid_mum(&a,&b); |
|||
return rapid_mix(a^secret[0]^len,b^secret[1]); |
|||
} |
|||
|
|||
/* |
|||
* rapidhash default seeded hash function. |
|||
* |
|||
* @param key Buffer to be hashed. |
|||
* @param len @key length, in bytes. |
|||
* @param seed 64-bit seed used to alter the hash result predictably. |
|||
* |
|||
* Calls rapidhash_internal using provided parameters and default secrets. |
|||
* |
|||
* Returns a 64-bit hash. |
|||
*/ |
|||
RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT { |
|||
return rapidhash_internal(key, len, seed, rapid_secret); |
|||
} |
|||
|
|||
/* |
|||
* rapidhash default hash function. |
|||
* |
|||
* @param key Buffer to be hashed. |
|||
* @param len @key length, in bytes. |
|||
* |
|||
* Calls rapidhash_withSeed using provided parameters and the default seed. |
|||
* |
|||
* Returns a 64-bit hash. |
|||
*/ |
|||
RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT { |
|||
return rapidhash_withSeed(key, len, RAPID_SEED); |
|||
} |
Write
Preview
Loading…
Cancel
Save
Reference in new issue