#include <fcntl.h>
#include <functional>
#include <getopt.h>
+#include <limits.h>
#include <memory>
#include <stdio.h>
#include <string.h>
#define dprintf(...)
//#define dprintf(...) fprintf(stderr, __VA_ARGS__);
+#include "turbopfor.h"
+
const char *dbpath = "/var/lib/mlocate/plocate.db";
bool print_nul = false;
size_t Corpus::get_num_filename_blocks() const
{
- // The beginning of the filename blocks is the end of the filename index blocks.
- uint64_t end;
- complete_pread(fd, &end, sizeof(end), hdr.filename_index_offset_bytes);
-
- // Subtract the sentinel block.
- return (end - hdr.filename_index_offset_bytes) / sizeof(uint64_t) - 1;
+ return hdr.num_docids;
}
size_t scan_file_block(const vector<string> &needles, string_view compressed,
dprintf("Corpus init done after %.1f ms.\n", 1e3 * duration<float>(steady_clock::now() - start).count());
vector<pair<Trigram, size_t>> trigrams;
+ uint64_t shortest_so_far = numeric_limits<uint32_t>::max();
for (const string &needle : needles) {
if (needle.size() < 3)
continue;
for (size_t i = 0; i < needle.size() - 2; ++i) {
uint32_t trgm = read_trigram(needle, i);
- corpus.find_trigram(trgm, [trgm, &trigrams](const Trigram *trgmptr, size_t len) {
+ corpus.find_trigram(trgm, [trgm, &trigrams, &shortest_so_far](const Trigram *trgmptr, size_t len) {
if (trgmptr == nullptr) {
- dprintf("trigram %06x isn't found, we abort the search\n", trgm);
+ dprintf("trigram '%c%c%c' isn't found, we abort the search\n",
+ trgm & 0xff, (trgm >> 8) & 0xff, (trgm >> 16) & 0xff);
exit(0);
}
- trigrams.emplace_back(*trgmptr, len);
+ if (trgmptr->num_docids > shortest_so_far * 100) {
+ dprintf("not loading trigram '%c%c%c' with %u docids, it would be ignored later anyway\n",
+ trgm & 0xff, (trgm >> 8) & 0xff, (trgm >> 16) & 0xff,
+ trgmptr->num_docids);
+ } else {
+ trigrams.emplace_back(*trgmptr, len);
+ shortest_so_far = std::min<uint64_t>(shortest_so_far, trgmptr->num_docids);
+ }
});
}
}
unsigned char *pldata = reinterpret_cast<unsigned char *>(s.data());
if (in1.empty()) {
in1.resize(num + 128);
- p4nd1dec128v32(pldata, num, &in1[0]);
+ decode_pfor_delta1<128>(pldata, num, /*interleaved=*/true, &in1[0]);
in1.resize(num);
dprintf("trigram '%c%c%c' (%zu bytes) decoded to %zu entries\n", trgm & 0xff,
(trgm >> 8) & 0xff, (trgm >> 16) & 0xff, len, num);
if (in2.size() < num + 128) {
in2.resize(num + 128);
}
- p4nd1dec128v32(pldata, num, &in2[0]);
+ decode_pfor_delta1<128>(pldata, num, /*interleaved=*/true, &in2[0]);
out.clear();
set_intersection(in1.begin(), in1.end(), in2.begin(), in2.begin() + num,
{
// The help text comes from mlocate.
printf("Usage: plocate [OPTION]... PATTERN...\n");
+ printf("\n");
printf(" -d, --database DBPATH use DBPATH instead of default database (which is\n");
printf(" %s)\n", dbpath);
printf(" -h, --help print this help\n");