From: Steinar H. Gunderson Date: Thu, 11 Dec 2014 19:51:38 +0000 (+0100) Subject: Change to even shorter prefix length; down from 3.8 to 3.1 GB (single partition is... X-Git-Url: https://git.sesse.net/?p=remoteglot-book;a=commitdiff_plain;h=dd46a39c59caf1ed3c3c79073a4acbd73c1496d0 Change to even shorter prefix length; down from 3.8 to 3.1 GB (single partition is 3.0 GB). --- diff --git a/hash.cpp b/hash.cpp index af56263..cc15148 100644 --- a/hash.cpp +++ b/hash.cpp @@ -6,9 +6,6 @@ using namespace std; int hash_key_to_bucket(const char* s, size_t len, int num_buckets) { - // We hash only the first 10 bytes; it should be enough to get a - // reasonable spread, but also mostly miss the move, so that - // same position + different move usually land in the same bucket. - len = min(len, 10); + len = min(len, HASH_PREFIX_BYTES); return util::Fingerprint32(s, len) % num_buckets; } diff --git a/hash.h b/hash.h index 9b9550a..39f01e0 100644 --- a/hash.h +++ b/hash.h @@ -1,6 +1,12 @@ #ifndef _HASH_H #define _HASH_H 1 +// Hashing more or fewer bytes is a tradeoff between more even partitions +// and total size (since seemingly key/prefix compression works better with +// smaller values). This value seems to be very close to optimal wrt. size, +// and has imbalances smaller than 2:1. +#define HASH_PREFIX_BYTES 4 + int hash_key_to_bucket(const char* s, size_t len, int num_buckets); #endif // !defined(_HASH_H)