mirror of https://github.com/trapexit/mergerfs.git
Browse Source
optimize readdir file dedup
optimize readdir file dedup
Use fasthash64 to hash filenames to uint64_t and store in khash set. Significantly reduces malloc/free'ing and memory usage.pull/530/head
Antonio SJ Musumeci
6 years ago
5 changed files with 192 additions and 48 deletions
-
84src/fasthash.cpp
-
56src/fasthash.h
-
39src/hashset.hpp
-
36src/khash.h
-
5src/readdir.cpp
@ -0,0 +1,84 @@ |
|||
/* The MIT License
|
|||
|
|||
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) |
|||
|
|||
Permission is hereby granted, free of charge, to any person |
|||
obtaining a copy of this software and associated documentation |
|||
files (the "Software"), to deal in the Software without |
|||
restriction, including without limitation the rights to use, copy, |
|||
modify, merge, publish, distribute, sublicense, and/or sell copies |
|||
of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be |
|||
included in all copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
|||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
|||
*/ |
|||
|
|||
#include "fasthash.h"
|
|||
|
|||
// Compression function for Merkle-Damgard construction.
|
|||
// This function is generated using the framework provided.
|
|||
#define mix(h) ({ \
|
|||
(h) ^= (h) >> 23; \ |
|||
(h) *= 0x2127599bf4325c37ULL; \ |
|||
(h) ^= (h) >> 47; }) |
|||
|
|||
uint64_t |
|||
fasthash64(const void *buf, |
|||
size_t len, |
|||
uint64_t seed) |
|||
{ |
|||
const uint64_t m = 0x880355f21e6d1965ULL; |
|||
const uint64_t *pos = (const uint64_t *)buf; |
|||
const uint64_t *end = pos + (len / 8); |
|||
const unsigned char *pos2; |
|||
uint64_t h = seed ^ (len * m); |
|||
uint64_t v; |
|||
|
|||
while (pos != end) { |
|||
v = *pos++; |
|||
h ^= mix(v); |
|||
h *= m; |
|||
} |
|||
|
|||
pos2 = (const unsigned char*)pos; |
|||
v = 0; |
|||
|
|||
switch (len & 7) { |
|||
case 7: v ^= (uint64_t)pos2[6] << 48; |
|||
case 6: v ^= (uint64_t)pos2[5] << 40; |
|||
case 5: v ^= (uint64_t)pos2[4] << 32; |
|||
case 4: v ^= (uint64_t)pos2[3] << 24; |
|||
case 3: v ^= (uint64_t)pos2[2] << 16; |
|||
case 2: v ^= (uint64_t)pos2[1] << 8; |
|||
case 1: v ^= (uint64_t)pos2[0]; |
|||
h ^= mix(v); |
|||
h *= m; |
|||
} |
|||
|
|||
return mix(h); |
|||
} |
|||
|
|||
uint32_t |
|||
fasthash32(const void *buf, |
|||
size_t len, |
|||
uint32_t seed) |
|||
{ |
|||
// the following trick converts the 64-bit hashcode to Fermat
|
|||
// residue, which shall retain information from both the higher
|
|||
// and lower parts of hashcode.
|
|||
uint64_t h; |
|||
|
|||
h = fasthash64(buf, len, seed); |
|||
|
|||
return (h - (h >> 32)); |
|||
} |
@ -0,0 +1,56 @@ |
|||
/* The MIT License |
|||
|
|||
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) |
|||
|
|||
Permission is hereby granted, free of charge, to any person |
|||
obtaining a copy of this software and associated documentation |
|||
files (the "Software"), to deal in the Software without |
|||
restriction, including without limitation the rights to use, copy, |
|||
modify, merge, publish, distribute, sublicense, and/or sell copies |
|||
of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be |
|||
included in all copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
|||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
|||
*/ |
|||
|
|||
#ifndef _FASTHASH_H |
|||
#define _FASTHASH_H |
|||
|
|||
#include <stdint.h> |
|||
#include <stdio.h> |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
/** |
|||
* fasthash32 - 32-bit implementation of fasthash |
|||
* @buf: data buffer |
|||
* @len: data size |
|||
* @seed: the seed |
|||
*/ |
|||
uint32_t fasthash32(const void *buf, size_t len, uint32_t seed); |
|||
|
|||
/** |
|||
* fasthash64 - 64-bit implementation of fasthash |
|||
* @buf: data buffer |
|||
* @len: data size |
|||
* @seed: the seed |
|||
*/ |
|||
uint64_t fasthash64(const void *buf, size_t len, uint64_t seed); |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
|
|||
#endif |
Write
Preview
Loading…
Cancel
Save
Reference in new issue