From fdecabfaabfef9e55f6f660150c94779fa0de88c Mon Sep 17 00:00:00 2001 From: Antonio SJ Musumeci Date: Mon, 21 Aug 2023 00:18:41 -0500 Subject: [PATCH] Update wyhash to final4 --- DEPENDENCIES | 1 + src/fs_mktemp.cpp | 7 +- src/gidcache.cpp | 4 +- src/rnd.cpp | 6 +- src/wyhash.h | 245 +++++++++++++++++++++++++++++++--------------- 5 files changed, 181 insertions(+), 82 deletions(-) diff --git a/DEPENDENCIES b/DEPENDENCIES index fef17e89..42edc7b7 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -3,6 +3,7 @@ ### included in repo * libfuse: https://github.com/libfuse/libfuse (heavily modified fork of v2.x) +* wyhash: https://github.com/wangyi-fudan/wyhash * ghc::filesystem: https://github.com/gulrak/filesystem * nonstd::optional: https://github.com/martinmoene/optional-lite * fmt: https://github.com/fmtlib/fmt diff --git a/src/fs_mktemp.cpp b/src/fs_mktemp.cpp index 7bdd7c26..39e45ebe 100644 --- a/src/fs_mktemp.cpp +++ b/src/fs_mktemp.cpp @@ -19,6 +19,7 @@ #include "errno.hpp" #include "fs_open.hpp" #include "fs_path.hpp" +#include "rnd.hpp" #include @@ -28,7 +29,9 @@ #define PAD_LEN 16 #define MAX_ATTEMPTS 3 -static char const CHARS[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +static char const CHARS[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +static size_t const CHARS_SIZE = (sizeof(CHARS) - 1); + namespace l { @@ -41,7 +44,7 @@ namespace l filename = '.'; for(int i = 0; i < PAD_LEN; i++) - filename += CHARS[std::rand() % (sizeof(CHARS) - 1)]; + filename += CHARS[RND::rand64(CHARS_SIZE)]; path = base_; path /= filename; diff --git a/src/gidcache.cpp b/src/gidcache.cpp index 9b3f8e91..dbd61f06 100644 --- a/src/gidcache.cpp +++ b/src/gidcache.cpp @@ -14,6 +14,8 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "rnd.hpp" + #include #include #include @@ -81,7 +83,7 @@ GIDRecord * GIDCache::allocrec(void) { if(size == MAXRECS) - return &recs[rand() % MAXRECS]; + return &recs[RND::rand64(MAXRECS)]; else return &recs[size++]; } diff --git a/src/rnd.cpp b/src/rnd.cpp index 9a432d56..fd8d07a3 100644 --- a/src/rnd.cpp +++ b/src/rnd.cpp @@ -26,9 +26,11 @@ static uint64_t G_SEED; -static RND G_RND; -RND::RND() +__attribute__((constructor)) +static +void +_constructor() { struct timeval tv; diff --git a/src/wyhash.h b/src/wyhash.h index daae691a..7df33d4d 100644 --- a/src/wyhash.h +++ b/src/wyhash.h @@ -1,140 +1,231 @@ -//Author: Wang Yi -#ifndef wyhash_final_version -#define wyhash_final_version -//defines that change behavior +// This is free and unencumbered software released into the public domain under The Unlicense (http://unlicense.org/) +// main repo: https://github.com/wangyi-fudan/wyhash +// author: 王一 Wang Yi +// contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger, Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero, paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314 (Devin), TheOneric + +/* quick example: + string s="fjsakfdsjkf"; + uint64_t hash=wyhash(s.c_str(), s.size(), 0, _wyp); +*/ + +#ifndef wyhash_final_version_4 +#define wyhash_final_version_4 + #ifndef WYHASH_CONDOM -#define WYHASH_CONDOM 1 //0: read 8 bytes before and after boundaries, dangerous but faster. 1: normal valid behavior 2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication" +//protections that produce different results: +//1: normal valid behavior +//2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication" +#define WYHASH_CONDOM 1 #endif -#define WYHASH_32BIT_MUM 0 //faster on 32 bit system + +#ifndef WYHASH_32BIT_MUM +//0: normal version, slow on 32 bit systems +//1: faster on 32 bit systems but produces different results, incompatible with wy2u0k function +#define WYHASH_32BIT_MUM 0 +#endif + //includes #include #include #if defined(_MSC_VER) && defined(_M_X64) -#include -#pragma intrinsic(_umul128) + #include + #pragma intrinsic(_umul128) #endif + +//likely and unlikely macros #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) -#define _likely_(x) __builtin_expect(x,1) -#define _unlikely_(x) __builtin_expect(x,0) + #define _likely_(x) __builtin_expect(x,1) + #define _unlikely_(x) __builtin_expect(x,0) #else -#define _likely_(x) (x) -#define _unlikely_(x) (x) + #define _likely_(x) (x) + #define _unlikely_(x) (x) #endif -//mum function + +//128bit multiply function static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); } static inline void _wymum(uint64_t *A, uint64_t *B){ #if(WYHASH_32BIT_MUM) - uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B; -#if(WYHASH_CONDOM>1) + uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(uint32_t)*B, lh=(uint32_t)*A*(*B>>32), ll=(uint64_t)(uint32_t)*A*(uint32_t)*B; + #if(WYHASH_CONDOM>1) *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll; -#else + #else *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll; -#endif + #endif #elif defined(__SIZEOF_INT128__) - __uint128_t r=*A; r*=*B; -#if(WYHASH_CONDOM>1) + __uint128_t r=*A; r*=*B; + #if(WYHASH_CONDOM>1) *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); -#else + #else *A=(uint64_t)r; *B=(uint64_t)(r>>64); -#endif + #endif #elif defined(_MSC_VER) && defined(_M_X64) -#if(WYHASH_CONDOM>1) + #if(WYHASH_CONDOM>1) uint64_t a, b; a=_umul128(*A,*B,&b); *A^=a; *B^=b; -#else + #else *A=_umul128(*A,*B,B); -#endif + #endif #else uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; -#if(WYHASH_CONDOM>1) + #if(WYHASH_CONDOM>1) *A^=lo; *B^=hi; -#else + #else *A=lo; *B=hi; -#endif + #endif #endif } + +//multiply and xor mix function, aka MUM static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; } -//read functions + +//endian macros #ifndef WYHASH_LITTLE_ENDIAN -#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define WYHASH_LITTLE_ENDIAN 1 -#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#define WYHASH_LITTLE_ENDIAN 0 -#endif + #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define WYHASH_LITTLE_ENDIAN 1 + #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #define WYHASH_LITTLE_ENDIAN 0 + #else + #warning could not determine endianness! Falling back to little endian. + #define WYHASH_LITTLE_ENDIAN 1 + #endif #endif + +//read functions #if (WYHASH_LITTLE_ENDIAN) static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;} -static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;} +static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;} #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);} -static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);} +static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return __builtin_bswap32(v);} #elif defined(_MSC_VER) static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);} -static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);} +static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return _byteswap_ulong(v);} +#else +static inline uint64_t _wyr8(const uint8_t *p) { + uint64_t v; memcpy(&v, p, 8); + return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000)); +} +static inline uint64_t _wyr4(const uint8_t *p) { + uint32_t v; memcpy(&v, p, 4); + return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000)); +} #endif -static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} -//wyhash function -static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){ -#if(WYHASH_CONDOM>0) - uint64_t a, b; - if(_likely_(i<=8)){ - if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); } - else if (_likely_(i)){ a=_wyr3(p,i); b=0; } +static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} +//wyhash main function +static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){ + const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0],secret[1]); uint64_t a, b; + if(_likely_(len<=16)){ + if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); } + else if(_likely_(len>0)){ a=_wyr3(p,len); b=0;} else a=b=0; } - else{ a=_wyr8(p); b=_wyr8(p+i-8); } - return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed)); -#else -#define oneshot_shift ((i<8)*((8-i)<<3)) - return _wymix(secret[1]^len,_wymix((_wyr8(p)<>oneshot_shift)^seed)); -#endif + else{ + size_t i=len; + if(_unlikely_(i>48)){ + uint64_t see1=seed, see2=seed; + do{ + seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); + see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1); + see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2); + p+=48; i-=48; + }while(_likely_(i>48)); + seed^=see1^see2; + } + while(_unlikely_(i>16)){ seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); i-=16; p+=16; } + a=_wyr8(p+i-16); b=_wyr8(p+i-8); + } + a^=secret[1]; b^=seed; _wymum(&a,&b); + return _wymix(a^secret[0]^len,b^secret[1]); } -static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){ - if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i); - return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16); -} +//the default secret parameters +static const uint64_t _wyp[4] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull}; -static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){ - const uint8_t *p=(const uint8_t *)key; - uint64_t i=len; seed^=*secret; - if(_unlikely_(i>64)){ - uint64_t see1=seed; - do{ - seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed); - see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1); - p+=64; i-=64; - }while(i>64); - seed^=see1; - } - return _wyfinish(p,len,seed,secret,i); -} -//utility functions -const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full}; -static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);} -static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);} +//a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand +static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=0xa0761d6478bd642full; B^=0xe7037ed1a0b428dbull; _wymum(&A,&B); return _wymix(A^0xa0761d6478bd642full,B^0xe7037ed1a0b428dbull);} + +//The wyrand PRNG that pass BigCrush and PractRand +static inline uint64_t wyrand(uint64_t *seed){ *seed+=0xa0761d6478bd642full; return _wymix(*seed,*seed^0xe7037ed1a0b428dbull);} + +//convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash. static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;} + +//convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution. It can be combined with wyrand, wyhash64 or wyhash. static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;} + +#ifdef WYTRNG +#include +//The wytrand true random number generator, passed BigCrush. +static inline uint64_t wytrand(uint64_t *seed){ + struct timeval t; gettimeofday(&t,0); + uint64_t teed=(((uint64_t)t.tv_sec)<<32)|t.tv_usec; + teed=_wymix(teed^_wyp[0],*seed^_wyp[1]); + *seed=_wymix(teed^_wyp[0],_wyp[2]); + return _wymix(*seed,*seed^_wyp[3]); +} +#endif + +#if(!WYHASH_32BIT_MUM) +//fast range integer random number generation on [0,k) credit to Daniel Lemire. May not work when WYHASH_32BIT_MUM=1. It can be combined with wyrand, wyhash64 or wyhash. +static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; } +#endif + +//make your own secret static inline void make_secret(uint64_t seed, uint64_t *secret){ uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 }; - for(size_t i=0;i<5;i++){ + for(size_t i=0;i<4;i++){ uint8_t ok; do{ ok=1; secret[i]=0; for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<> 1) & 0x5555555555555555; + x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); + x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f; + x = (x * 0x0101010101010101) >> 56; + if(x!=32){ ok=0; break; } #endif - if(!ok)continue; - for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; } + } }while(!ok); } } + #endif + +/* The Unlicense +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to +*/