@ -1,11 +1,29 @@
/ / Author : Wang Yi < godspeed_china @ yeah . net >
# ifndef wyhash_final_version
# define wyhash_final_version
/ / defines that change behavior
/ / This is free and unencumbered software released into the public domain under The Unlicense ( http : / / unlicense . org / )
/ / main repo : https : / / github . com / wangyi - fudan / wyhash
/ / author : 王 一 Wang Yi < godspeed_china @ yeah . net >
/ / contributors : Reini Urban , Dietrich Epp , Joshua Haberman , Tommy Ettinger , Daniel Lemire , Otmar Ertl , cocowalla , leo - yuriev , Diego Barrios Romero , paulie - g , dumblob , Yann Collet , ivte - ms , hyb , James Z . M . Gao , easyaspi314 ( Devin ) , TheOneric
/* quick example:
string s = " fjsakfdsjkf " ;
uint64_t hash = wyhash ( s . c_str ( ) , s . size ( ) , 0 , _wyp ) ;
*/
# ifndef wyhash_final_version_4
# define wyhash_final_version_4
# ifndef WYHASH_CONDOM
# define WYHASH_CONDOM 1 / / 0: read 8 bytes before and after boundaries, dangerous but faster. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
/ / protections that produce different results :
/ / 1 : normal valid behavior
/ / 2 : extra protection against entropy loss ( probability = 2 ^ - 63 ) , aka . " blind multiplication "
# define WYHASH_CONDOM 1
# endif
# define WYHASH_32BIT_MUM 0 / / faster on 32 bit system
# ifndef WYHASH_32BIT_MUM
/ / 0 : normal version , slow on 32 bit systems
/ / 1 : faster on 32 bit systems but produces different results , incompatible with wy2u0k function
# define WYHASH_32BIT_MUM 0
# endif
/ / includes
# include <stdint.h>
# include <string.h>
@ -13,6 +31,8 @@
# include <intrin.h>
# pragma intrinsic(_umul128)
# endif
/ / likely and unlikely macros
# if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
# define _likely_(x) __builtin_expect(x,1)
# define _unlikely_(x) __builtin_expect(x,0)
@ -20,11 +40,12 @@
# define _likely_(x) (x)
# define _unlikely_(x) (x)
# endif
/ / mum function
/ / 128 bit multiply function
static inline uint64_t _wyrot ( uint64_t x ) { return ( x > > 32 ) | ( x < < 32 ) ; }
static inline void _wymum ( uint64_t * A , uint64_t * B ) {
# if(WYHASH_32BIT_MUM)
uint64_t hh = ( * A > > 32 ) * ( * B > > 32 ) , hl = ( * A > > 32 ) * ( unsigned ) * B , lh = ( unsigned ) * A * ( * B > > 32 ) , ll = ( uint64_t ) ( unsigned ) * A * ( unsigned ) * B ;
uint64_t hh = ( * A > > 32 ) * ( * B > > 32 ) , hl = ( * A > > 32 ) * ( uint32_t ) * B , lh = ( uint32_t ) * A * ( * B > > 32 ) , ll = ( uint64_t ) ( uint32_t ) * A * ( uint32_t ) * B ;
# if(WYHASH_CONDOM>1)
* A ^ = _wyrot ( hl ) ^ hh ; * B ^ = _wyrot ( lh ) ^ ll ;
# else
@ -56,85 +77,155 @@ static inline void _wymum(uint64_t *A, uint64_t *B){
# endif
# endif
}
/ / multiply and xor mix function , aka MUM
static inline uint64_t _wymix ( uint64_t A , uint64_t B ) { _wymum ( & A , & B ) ; return A ^ B ; }
/ / read functions
/ / endian macros
# ifndef WYHASH_LITTLE_ENDIAN
# if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
# define WYHASH_LITTLE_ENDIAN 1
# elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
# define WYHASH_LITTLE_ENDIAN 0
# else
# warning could not determine endianness! Falling back to little endian.
# define WYHASH_LITTLE_ENDIAN 1
# endif
# endif
/ / read functions
# if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyr8 ( const uint8_t * p ) { uint64_t v ; memcpy ( & v , p , 8 ) ; return v ; }
static inline uint64_t _wyr4 ( const uint8_t * p ) { unsigned v ; memcpy ( & v , p , 4 ) ; return v ; }
static inline uint64_t _wyr4 ( const uint8_t * p ) { uint32_t v ; memcpy ( & v , p , 4 ) ; return v ; }
# elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyr8 ( const uint8_t * p ) { uint64_t v ; memcpy ( & v , p , 8 ) ; return __builtin_bswap64 ( v ) ; }
static inline uint64_t _wyr4 ( const uint8_t * p ) { unsigned v ; memcpy ( & v , p , 4 ) ; return __builtin_bswap32 ( v ) ; }
static inline uint64_t _wyr4 ( const uint8_t * p ) { uint32_t v ; memcpy ( & v , p , 4 ) ; return __builtin_bswap32 ( v ) ; }
# elif defined(_MSC_VER)
static inline uint64_t _wyr8 ( const uint8_t * p ) { uint64_t v ; memcpy ( & v , p , 8 ) ; return _byteswap_uint64 ( v ) ; }
static inline uint64_t _wyr4 ( const uint8_t * p ) { unsigned v ; memcpy ( & v , p , 4 ) ; return _byteswap_ulong ( v ) ; }
# endif
static inline uint64_t _wyr3 ( const uint8_t * p , unsigned k ) { return ( ( ( uint64_t ) p [ 0 ] ) < < 16 ) | ( ( ( uint64_t ) p [ k > > 1 ] ) < < 8 ) | p [ k - 1 ] ; }
/ / wyhash function
static inline uint64_t _wyfinish16 ( const uint8_t * p , uint64_t len , uint64_t seed , const uint64_t * secret , uint64_t i ) {
# if(WYHASH_CONDOM>0)
uint64_t a , b ;
if ( _likely_ ( i < = 8 ) ) {
if ( _likely_ ( i > = 4 ) ) { a = _wyr4 ( p ) ; b = _wyr4 ( p + i - 4 ) ; }
else if ( _likely_ ( i ) ) { a = _wyr3 ( p , i ) ; b = 0 ; }
else a = b = 0 ;
}
else { a = _wyr8 ( p ) ; b = _wyr8 ( p + i - 8 ) ; }
return _wymix ( secret [ 1 ] ^ len , _wymix ( a ^ secret [ 1 ] , b ^ seed ) ) ;
static inline uint64_t _wyr4 ( const uint8_t * p ) { uint32_t v ; memcpy ( & v , p , 4 ) ; return _byteswap_ulong ( v ) ; }
# else
# define oneshot_shift ((i<8)*((8-i)<<3))
return _wymix ( secret [ 1 ] ^ len , _wymix ( ( _wyr8 ( p ) < < oneshot_shift ) ^ secret [ 1 ] , ( _wyr8 ( p + i - 8 ) > > oneshot_shift ) ^ seed ) ) ;
# endif
static inline uint64_t _wyr8 ( const uint8_t * p ) {
uint64_t v ; memcpy ( & v , p , 8 ) ;
return ( ( ( v > > 56 ) & 0xff ) | ( ( v > > 40 ) & 0xff00 ) | ( ( v > > 24 ) & 0xff0000 ) | ( ( v > > 8 ) & 0xff000000 ) | ( ( v < < 8 ) & 0xff00000000 ) | ( ( v < < 24 ) & 0xff0000000000 ) | ( ( v < < 40 ) & 0xff000000000000 ) | ( ( v < < 56 ) & 0xff00000000000000 ) ) ;
}
static inline uint64_t _wyfinish ( const uint8_t * p , uint64_t len , uint64_t seed , const uint64_t * secret , uint64_t i ) {
if ( _likely_ ( i < = 16 ) ) return _wyfinish16 ( p , len , seed , secret , i ) ;
return _wyfinish ( p + 16 , len , _wymix ( _wyr8 ( p ) ^ secret [ 1 ] , _wyr8 ( p + 8 ) ^ seed ) , secret , i - 16 ) ;
static inline uint64_t _wyr4 ( const uint8_t * p ) {
uint32_t v ; memcpy ( & v , p , 4 ) ;
return ( ( ( v > > 24 ) & 0xff ) | ( ( v > > 8 ) & 0xff00 ) | ( ( v < < 8 ) & 0xff0000 ) | ( ( v < < 24 ) & 0xff000000 ) ) ;
}
static inline uint64_t wyhash ( const void * key , uint64_t len , uint64_t seed , const uint64_t * secret ) {
const uint8_t * p = ( const uint8_t * ) key ;
uint64_t i = len ; seed ^ = * secret ;
if ( _unlikely_ ( i > 64 ) ) {
uint64_t see1 = seed ;
# endif
static inline uint64_t _wyr3 ( const uint8_t * p , size_t k ) { return ( ( ( uint64_t ) p [ 0 ] ) < < 16 ) | ( ( ( uint64_t ) p [ k > > 1 ] ) < < 8 ) | p [ k - 1 ] ; }
/ / wyhash main function
static inline uint64_t wyhash ( const void * key , size_t len , uint64_t seed , const uint64_t * secret ) {
const uint8_t * p = ( const uint8_t * ) key ; seed ^ = _wymix ( seed ^ secret [ 0 ] , secret [ 1 ] ) ; uint64_t a , b ;
if ( _likely_ ( len < = 16 ) ) {
if ( _likely_ ( len > = 4 ) ) { a = ( _wyr4 ( p ) < < 32 ) | _wyr4 ( p + ( ( len > > 3 ) < < 2 ) ) ; b = ( _wyr4 ( p + len - 4 ) < < 32 ) | _wyr4 ( p + len - 4 - ( ( len > > 3 ) < < 2 ) ) ; }
else if ( _likely_ ( len > 0 ) ) { a = _wyr3 ( p , len ) ; b = 0 ; }
else a = b = 0 ;
}
else {
size_t i = len ;
if ( _unlikely_ ( i > 48 ) ) {
uint64_t see1 = seed , see2 = seed ;
do {
seed = _wymix ( _wyr8 ( p ) ^ secret [ 1 ] , _wyr8 ( p + 8 ) ^ seed ) ^ _wymix ( _wyr8 ( p + 16 ) ^ secret [ 2 ] , _wyr8 ( p + 24 ) ^ seed ) ;
see1 = _wymix ( _wyr8 ( p + 32 ) ^ secret [ 3 ] , _wyr8 ( p + 40 ) ^ see1 ) ^ _wymix ( _wyr8 ( p + 48 ) ^ secret [ 4 ] , _wyr8 ( p + 56 ) ^ see1 ) ;
p + = 64 ; i - = 64 ;
} while ( i > 64 ) ;
seed ^ = see1 ;
seed = _wymix ( _wyr8 ( p ) ^ secret [ 1 ] , _wyr8 ( p + 8 ) ^ seed ) ;
see1 = _wymix ( _wyr8 ( p + 16 ) ^ secret [ 2 ] , _wyr8 ( p + 24 ) ^ see1 ) ;
see2 = _wymix ( _wyr8 ( p + 32 ) ^ secret [ 3 ] , _wyr8 ( p + 40 ) ^ see2 ) ;
p + = 48 ; i - = 48 ;
} while ( _likely_ ( i > 48 ) ) ;
seed ^ = see1 ^ see2 ;
}
while ( _unlikely_ ( i > 16 ) ) { seed = _wymix ( _wyr8 ( p ) ^ secret [ 1 ] , _wyr8 ( p + 8 ) ^ seed ) ; i - = 16 ; p + = 16 ; }
a = _wyr8 ( p + i - 16 ) ; b = _wyr8 ( p + i - 8 ) ;
}
return _wyfinish ( p , len , seed , secret , i ) ;
a ^ = secret [ 1 ] ; b ^ = seed ; _wymum ( & a , & b ) ;
return _wymix ( a ^ secret [ 0 ] ^ len , b ^ secret [ 1 ] ) ;
}
/ / utility functions
const uint64_t _wyp [ 5 ] = { 0xa0761d6478bd642full , 0xe7037ed1a0b428dbull , 0x8ebc6af09c88c6e3ull , 0x589965cc75374cc3ull , 0x1d8e4e27c47d124full } ;
static inline uint64_t wyhash64 ( uint64_t A , uint64_t B ) { A ^ = _wyp [ 0 ] ; B ^ = _wyp [ 1 ] ; _wymum ( & A , & B ) ; return _wymix ( A ^ _wyp [ 0 ] , B ^ _wyp [ 1 ] ) ; }
static inline uint64_t wyrand ( uint64_t * seed ) { * seed + = _wyp [ 0 ] ; return _wymix ( * seed , * seed ^ _wyp [ 1 ] ) ; }
/ / the default secret parameters
static const uint64_t _wyp [ 4 ] = { 0xa0761d6478bd642full , 0xe7037ed1a0b428dbull , 0x8ebc6af09c88c6e3ull , 0x589965cc75374cc3ull } ;
/ / a useful 64 bit - 64 bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand
static inline uint64_t wyhash64 ( uint64_t A , uint64_t B ) { A ^ = 0xa0761d6478bd642full ; B ^ = 0xe7037ed1a0b428dbull ; _wymum ( & A , & B ) ; return _wymix ( A ^ 0xa0761d6478bd642full , B ^ 0xe7037ed1a0b428dbull ) ; }
/ / The wyrand PRNG that pass BigCrush and PractRand
static inline uint64_t wyrand ( uint64_t * seed ) { * seed + = 0xa0761d6478bd642full ; return _wymix ( * seed , * seed ^ 0xe7037ed1a0b428dbull ) ; }
/ / convert any 64 bit pseudo random numbers to uniform distribution [ 0 , 1 ) . It can be combined with wyrand , wyhash64 or wyhash .
static inline double wy2u01 ( uint64_t r ) { const double _wynorm = 1.0 / ( 1ull < < 52 ) ; return ( r > > 12 ) * _wynorm ; }
/ / convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution . It can be combined with wyrand , wyhash64 or wyhash .
static inline double wy2gau ( uint64_t r ) { const double _wynorm = 1.0 / ( 1ull < < 20 ) ; return ( ( r & 0x1fffff ) + ( ( r > > 21 ) & 0x1fffff ) + ( ( r > > 42 ) & 0x1fffff ) ) * _wynorm - 3.0 ; }
# ifdef WYTRNG
# include <sys/time.h>
/ / The wytrand true random number generator , passed BigCrush .
static inline uint64_t wytrand ( uint64_t * seed ) {
struct timeval t ; gettimeofday ( & t , 0 ) ;
uint64_t teed = ( ( ( uint64_t ) t . tv_sec ) < < 32 ) | t . tv_usec ;
teed = _wymix ( teed ^ _wyp [ 0 ] , * seed ^ _wyp [ 1 ] ) ;
* seed = _wymix ( teed ^ _wyp [ 0 ] , _wyp [ 2 ] ) ;
return _wymix ( * seed , * seed ^ _wyp [ 3 ] ) ;
}
# endif
# if(!WYHASH_32BIT_MUM)
/ / fast range integer random number generation on [ 0 , k ) credit to Daniel Lemire . May not work when WYHASH_32BIT_MUM = 1. It can be combined with wyrand , wyhash64 or wyhash .
static inline uint64_t wy2u0k ( uint64_t r , uint64_t k ) { _wymum ( & r , & k ) ; return k ; }
# endif
/ / make your own secret
static inline void make_secret ( uint64_t seed , uint64_t * secret ) {
uint8_t c [ ] = { 15 , 23 , 27 , 29 , 30 , 39 , 43 , 45 , 46 , 51 , 53 , 54 , 57 , 58 , 60 , 71 , 75 , 77 , 78 , 83 , 85 , 86 , 89 , 90 , 92 , 99 , 101 , 102 , 105 , 106 , 108 , 113 , 114 , 116 , 120 , 135 , 139 , 141 , 142 , 147 , 149 , 150 , 153 , 154 , 156 , 163 , 165 , 166 , 169 , 170 , 172 , 177 , 178 , 180 , 184 , 195 , 197 , 198 , 201 , 202 , 204 , 209 , 210 , 212 , 216 , 225 , 226 , 228 , 232 , 240 } ;
for ( size_t i = 0 ; i < 5 ; i + + ) {
for ( size_t i = 0 ; i < 4 ; i + + ) {
uint8_t ok ;
do {
ok = 1 ; secret [ i ] = 0 ;
for ( size_t j = 0 ; j < 64 ; j + = 8 ) secret [ i ] | = ( ( uint64_t ) c [ wyrand ( & seed ) % sizeof ( c ) ] ) < < j ;
if ( secret [ i ] % 2 = = 0 ) { ok = 0 ; continue ; }
for ( size_t j = 0 ; j < i ; j + + )
for ( size_t j = 0 ; j < i ; j + + ) {
# if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
if ( __builtin_popcountll ( secret [ j ] ^ secret [ i ] ) ! = 32 ) { ok = 0 ; break ; }
# elif defined(_MSC_VER) && defined(_M_X64)
if ( _mm_popcnt_u64 ( secret [ j ] ^ secret [ i ] ) ! = 32 ) { ok = 0 ; break ; }
# else
/ / manual popcount
uint64_t x = secret [ j ] ^ secret [ i ] ;
x - = ( x > > 1 ) & 0x5555555555555555 ;
x = ( x & 0x3333333333333333 ) + ( ( x > > 2 ) & 0x3333333333333333 ) ;
x = ( x + ( x > > 4 ) ) & 0x0f0f0f0f0f0f0f0f ;
x = ( x * 0x0101010101010101 ) > > 56 ;
if ( x ! = 32 ) { ok = 0 ; break ; }
# endif
if ( ! ok ) continue ;
for ( uint64_t j = 3 ; j < 0x100000000ull ; j + = 2 ) if ( secret [ i ] % j = = 0 ) { ok = 0 ; break ; }
}
} while ( ! ok ) ;
}
}
# endif
/* The Unlicense
This is free and unencumbered software released into the public domain .
Anyone is free to copy , modify , publish , use , compile , sell , or
distribute this software , either in source code form or as a compiled
binary , for any purpose , commercial or non - commercial , and by any
means .
In jurisdictions that recognize copyright laws , the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain . We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors . We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law .
THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND ,
EXPRESS OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT .
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM , DAMAGES OR
OTHER LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE ,
ARISING FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE .
For more information , please refer to < http : / / unlicense . org / >
*/