+bool is_prime(uint32_t x)
+{
+ if ((x % 2) == 0 || (x % 3) == 0) {
+ return false;
+ }
+ uint32_t limit = ceil(sqrt(x));
+ for (uint32_t factor = 5; factor <= limit; ++factor) {
+ if ((x % factor) == 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+uint32_t next_prime(uint32_t x)
+{
+ if ((x % 2) == 0) {
+ ++x;
+ }
+ while (!is_prime(x)) {
+ x += 2;
+ }
+ return x;
+}
+
+unique_ptr<Trigram[]> create_hashtable(Corpus &corpus, const vector<uint32_t> &all_trigrams, uint32_t ht_size, uint32_t num_overflow_slots)
+{
+ unique_ptr<Trigram[]> ht(new Trigram[ht_size + num_overflow_slots + 1]); // 1 for the sentinel element at the end.
+ for (unsigned i = 0; i < ht_size + num_overflow_slots + 1; ++i) {
+ ht[i].trgm = uint32_t(-1);
+ ht[i].num_docids = 0;
+ ht[i].offset = 0;
+ }
+ for (uint32_t trgm : all_trigrams) {
+ // We don't know offset yet, so set it to zero.
+ Trigram to_insert{ trgm, uint32_t(corpus.get_pl_builder(trgm).num_docids), 0 };
+
+ uint32_t bucket = hash_trigram(trgm, ht_size);
+ unsigned distance = 0;
+ while (ht[bucket].num_docids != 0) {
+ // Robin Hood hashing; reduces the longest distance by a lot.
+ unsigned other_distance = bucket - hash_trigram(ht[bucket].trgm, ht_size);
+ if (distance > other_distance) {
+ swap(to_insert, ht[bucket]);
+ distance = other_distance;
+ }
+
+ ++bucket, ++distance;
+ if (distance > num_overflow_slots) {
+ return nullptr;
+ }
+ }
+ ht[bucket] = to_insert;
+ }
+ return ht;
+}
+