int hash_key_to_bucket(const char* s, size_t len, int num_buckets)
{
- // We hash only the first 10 bytes; it should be enough to get a
- // reasonable spread, but also mostly miss the move, so that
- // same position + different move usually land in the same bucket.
- len = min<size_t>(len, 10);
+ len = min<size_t>(len, HASH_PREFIX_BYTES);
return util::Fingerprint32(s, len) % num_buckets;
}
#ifndef _HASH_H
#define _HASH_H 1
+// Hashing more or fewer bytes is a tradeoff between more even partitions
+// and total size (since seemingly key/prefix compression works better with
+// smaller values). This value seems to be very close to optimal wrt. size,
+// and has imbalances smaller than 2:1.
+#define HASH_PREFIX_BYTES 4
+
int hash_key_to_bucket(const char* s, size_t len, int num_buckets);
#endif // !defined(_HASH_H)