diff --git a/src/fasthash.cpp b/src/fasthash.cpp new file mode 100644 index 00000000..2d327cdf --- /dev/null +++ b/src/fasthash.cpp @@ -0,0 +1,84 @@ +/* The MIT License + + Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include "fasthash.h" + +// Compression function for Merkle-Damgard construction. +// This function is generated using the framework provided. +#define mix(h) ({ \ + (h) ^= (h) >> 23; \ + (h) *= 0x2127599bf4325c37ULL; \ + (h) ^= (h) >> 47; }) + +uint64_t +fasthash64(const void *buf, + size_t len, + uint64_t seed) +{ + const uint64_t m = 0x880355f21e6d1965ULL; + const uint64_t *pos = (const uint64_t *)buf; + const uint64_t *end = pos + (len / 8); + const unsigned char *pos2; + uint64_t h = seed ^ (len * m); + uint64_t v; + + while (pos != end) { + v = *pos++; + h ^= mix(v); + h *= m; + } + + pos2 = (const unsigned char*)pos; + v = 0; + + switch (len & 7) { + case 7: v ^= (uint64_t)pos2[6] << 48; + case 6: v ^= (uint64_t)pos2[5] << 40; + case 5: v ^= (uint64_t)pos2[4] << 32; + case 4: v ^= (uint64_t)pos2[3] << 24; + case 3: v ^= (uint64_t)pos2[2] << 16; + case 2: v ^= (uint64_t)pos2[1] << 8; + case 1: v ^= (uint64_t)pos2[0]; + h ^= mix(v); + h *= m; + } + + return mix(h); +} + +uint32_t +fasthash32(const void *buf, + size_t len, + uint32_t seed) +{ + // the following trick converts the 64-bit hashcode to Fermat + // residue, which shall retain information from both the higher + // and lower parts of hashcode. + uint64_t h; + + h = fasthash64(buf, len, seed); + + return (h - (h >> 32)); +} diff --git a/src/fasthash.h b/src/fasthash.h new file mode 100644 index 00000000..ed5b0bc1 --- /dev/null +++ b/src/fasthash.h @@ -0,0 +1,56 @@ +/* The MIT License + + Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef _FASTHASH_H +#define _FASTHASH_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * fasthash32 - 32-bit implementation of fasthash + * @buf: data buffer + * @len: data size + * @seed: the seed + */ + uint32_t fasthash32(const void *buf, size_t len, uint32_t seed); + + /** + * fasthash64 - 64-bit implementation of fasthash + * @buf: data buffer + * @len: data size + * @seed: the seed + */ + uint64_t fasthash64(const void *buf, size_t len, uint64_t seed); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/strset.hpp b/src/hashset.hpp similarity index 66% rename from src/strset.hpp rename to src/hashset.hpp index 9630cfc4..6f2370c4 100644 --- a/src/strset.hpp +++ b/src/hashset.hpp @@ -1,7 +1,7 @@ /* ISC License - Copyright (c) 2016, Antonio SJ Musumeci + Copyright (c) 2018, Antonio SJ Musumeci Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -19,44 +19,49 @@ #pragma once #include "khash.h" +#include "fasthash.h" -#include +KHASH_SET_INIT_INT64(hashset); -KHASH_SET_INIT_STR(strset); - -class StrSet +class HashSet { public: - StrSet() + HashSet() { - _set = kh_init(strset); + _set = kh_init(hashset); } - ~StrSet() + ~HashSet() { - for(khint_t k = kh_begin(_set), ek = kh_end(_set); k != ek; k++) - if(kh_exist(_set,k)) - ::free((char*)kh_key(_set,k)); - - kh_destroy(strset,_set); + kh_destroy(hashset,_set); } inline int - put(const char *str) + put(const char *str_) { int rv; + uint64_t h; khint_t key; - key = kh_put(strset,_set,str,&rv); + h = fasthash64(str_,strlen(str_),0x7472617065786974); + + key = kh_put(hashset,_set,h,&rv); if(rv == 0) return 0; - kh_key(_set,key) = ::strdup(str); + kh_key(_set,key) = h; return rv; } + inline + int + size(void) + { + return kh_size(_set); + } + private: - khash_t(strset) *_set; + khash_t(hashset) *_set; }; diff --git a/src/khash.h b/src/khash.h index c3121e82..f75f3474 100644 --- a/src/khash.h +++ b/src/khash.h @@ -130,7 +130,6 @@ int main() { #include /* compiler specific configuration */ -typedef unsigned char khint8_t; #if UINT_MAX == 0xffffffffu typedef unsigned int khint32_t; @@ -163,17 +162,15 @@ typedef unsigned long long khint64_t; typedef khint32_t khint_t; typedef khint_t khiter_t; -#define __ac_bit_empty 0b00000010 -#define __ac_bit_deleted 0b00000001 -#define __ac_bit_any 0b11111111 +#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) +#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) +#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) +#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) +#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) +#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) +#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) -#define __ac_isempty(flag, i) (flag[i]&__ac_bit_empty) -#define __ac_isdel(flag, i) (flag[i]&__ac_bit_deleted) -#define __ac_iseither(flag, i) (flag[i]&__ac_bit_any) -#define __ac_set_isdel_false(flag, i) (flag[i]&=~__ac_bit_deleted) -#define __ac_set_isempty_false(flag, i) (flag[i]&=~__ac_bit_empty) -#define __ac_set_isboth_false(flag, i) (flag[i]=0) -#define __ac_set_isdel_true(flag, i) (flag[i]|=__ac_bit_deleted) +#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) #ifndef kroundup32 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) @@ -197,7 +194,7 @@ static const double __ac_HASH_UPPER = 0.77; #define __KHASH_TYPE(name, khkey_t, khval_t) \ typedef struct kh_##name##_s { \ khint_t n_buckets, size, n_occupied, upper_bound; \ - khint8_t *flags; \ + khint32_t *flags; \ khkey_t *keys; \ khval_t *vals; \ } kh_##name##_t; @@ -226,7 +223,7 @@ static const double __ac_HASH_UPPER = 0.77; SCOPE void kh_clear_##name(kh_##name##_t *h) \ { \ if (h && h->flags) { \ - memset(h->flags, __ac_bit_empty, h->n_buckets); \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ h->size = h->n_occupied = 0; \ } \ } \ @@ -246,16 +243,16 @@ static const double __ac_HASH_UPPER = 0.77; } \ SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ - khint8_t *new_flags = 0; \ + khint32_t *new_flags = 0; \ khint_t j = 1; \ { \ kroundup32(new_n_buckets); \ if (new_n_buckets < 4) new_n_buckets = 4; \ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ else { /* hash table size to be changed (shrink or expand); rehash */ \ - new_flags = (khint8_t*)kmalloc(new_n_buckets); \ + new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ if (!new_flags) return -1; \ - memset(new_flags, __ac_bit_empty, new_n_buckets); \ + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ if (h->n_buckets < new_n_buckets) { /* expand */ \ khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ if (!new_keys) { kfree(new_flags); return -1; } \ @@ -322,16 +319,19 @@ static const double __ac_HASH_UPPER = 0.77; { \ khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ - last = i; \ - while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ - if (__ac_isdel(h->flags, i)) site = i; \ - i = (i + (++step)) & mask; \ - if (i == last) { x = site; break; } \ - } \ - if (x == h->n_buckets) { \ - if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ - else x = i; \ - } \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + (++step)) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ } \ if (__ac_isempty(h->flags, x)) { /* not present at all */ \ h->keys[x] = key; \ @@ -576,7 +576,7 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) code; \ } } -/* More conenient interfaces */ +/* More convenient interfaces */ /*! @function @abstract Instantiate a hash set containing integer keys @@ -594,7 +594,7 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key) KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) /*! @function - @abstract Instantiate a hash map containing 64-bit integer keys + @abstract Instantiate a hash set containing 64-bit integer keys @param name Name of the hash table [symbol] */ #define KHASH_SET_INIT_INT64(name) \ diff --git a/src/readdir.cpp b/src/readdir.cpp index 16932a88..29ad9411 100644 --- a/src/readdir.cpp +++ b/src/readdir.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include "config.hpp" @@ -33,9 +32,9 @@ #include "fs_devid.hpp" #include "fs_inode.hpp" #include "fs_path.hpp" +#include "hashset.hpp" #include "readdir.hpp" #include "rwlock.hpp" -#include "strset.hpp" #include "ugid.hpp" using std::string; @@ -50,7 +49,7 @@ _readdir(const Branches &branches_, void *buf, const fuse_fill_dir_t filler) { - StrSet names; + HashSet names; string basepath; struct stat st = {0};