From 8027830475c781c5538e5466f30434c08a840ccd Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 13 Dec 2014 12:37:21 +0100 Subject: [PATCH] Reduce binloader RAM requirements by over 40%. --- Makefile | 6 +++--- arena.cpp | 33 +++++++++++++++++++++++++++++++++ arena.h | 24 ++++++++++++++++++++++++ binloader.cpp | 51 +++++++++++++++++++++++++++++++++++---------------- 4 files changed, 95 insertions(+), 19 deletions(-) create mode 100644 arena.cpp create mode 100644 arena.h diff --git a/Makefile b/Makefile index cd85e4f..9d133ec 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,11 @@ PROTOC=protoc all: binloader binlookup binmerger -binloader: binloader.o hash.o count.pb.o +binloader: binloader.o hash.o arena.o count.pb.o binmerger: binmerger.o count.pb.o binlookup: binlookup.o hash.o count.pb.o -binloader.o: binloader.cpp count.pb.h +binloader.o: binloader.cpp arena.cpp count.pb.h binmerger.o: binmerger.cpp count.pb.h binlookup.o: binlookup.cpp count.pb.h @@ -17,4 +17,4 @@ binlookup.o: binlookup.cpp count.pb.h .PHONY: clean clean: - $(RM) binloader binlookup binmerger binloader.o binmerger.o binlookup.o hash.o count.pb.o count.pb.h count.pb.cc + $(RM) binloader binlookup binmerger binloader.o binmerger.o binlookup.o hash.o arena.o count.pb.o count.pb.h count.pb.cc diff --git a/arena.cpp b/arena.cpp new file mode 100644 index 0000000..048e6db --- /dev/null +++ b/arena.cpp @@ -0,0 +1,33 @@ +#include +#include +#include "arena.h" + +Arena::Arena() : first(NULL) {} + +Arena::~Arena() +{ + Block *next; + for (Block *b = first; b != NULL; b = next) { + delete[] b->memory; + + next = b->next; + delete b; + } +} + +char *Arena::alloc(size_t bytes) +{ + assert(bytes < BLOCK_SIZE); // Can fix, but we don't need to. + + if (first == NULL || first->used + bytes > BLOCK_SIZE) { + Block *b = new Block; + b->memory = new char[BLOCK_SIZE]; + b->used = 0; + b->next = first; + first = b; + } + + char *ret = first->memory + first->used; + first->used += bytes; + return ret; +} diff --git a/arena.h b/arena.h new file mode 100644 index 0000000..5d254dd --- /dev/null +++ b/arena.h @@ -0,0 +1,24 @@ +#ifndef _ARENA_H +#define _ARENA_H + +// A simple arena for allocating lots of short strings. + +class Arena { +public: + Arena(); + ~Arena(); + + char *alloc(size_t bytes); + +private: + static constexpr size_t BLOCK_SIZE = 1048576; + + struct Block { + char *memory; + size_t used; + Block *next; + }; + Block *first; +}; + +#endif // _ARENA_H diff --git a/binloader.cpp b/binloader.cpp index 8660e8e..272fc30 100644 --- a/binloader.cpp +++ b/binloader.cpp @@ -12,24 +12,36 @@ #include #include #include "count.pb.h" +#include "arena.h" #include "hash.h" #define DUMMY_TIMESTAMP 32503680000 using namespace std; +Arena arena; + enum Result { WHITE = 0, DRAW, BLACK }; struct Element { - string bpfen; - string move; + char *bpfen; + int bpfen_len; + char move[8]; // Na1xc3+ Result result; int opening_num, white_elo, black_elo; - time_t timestamp; int file_num; + time_t timestamp; long start_position; bool operator< (const Element& other) const { - return bpfen < other.bpfen; + int shared_len = min(bpfen_len, other.bpfen_len); + int s = memcmp(bpfen, other.bpfen, shared_len); + if (s < 0) { + return true; + } else if (s > 0) { + return false; + } else { + return bpfen_len < other.bpfen_len; + } } }; @@ -48,14 +60,13 @@ int main(int argc, char **argv) exit(1); } for ( ;; ) { - int l = getc(fp); - if (l == -1) { + int bpfen_len = getc(fp); + if (bpfen_len == -1) { break; } - string bpfen; - bpfen.resize(l); - if (fread(&bpfen[0], l, 1, fp) != 1) { + char *bpfen = arena.alloc(bpfen_len); + if (fread(bpfen, bpfen_len, 1, fp) != 1) { perror("fread()"); // exit(1); break; @@ -103,20 +114,26 @@ int main(int argc, char **argv) } - l = getc(fp); + char move[8]; + int l = getc(fp); if (l == -1) { break; } - string move; - move.resize(l); + if (l >= int(sizeof(move))) { + fprintf(stderr, "Overlong move (%d bytes)\n", l); + // exit(1); + break; + } if (fread(&move[0], l, 1, fp) != 1) { perror("fread()"); // exit(1); break; } + move[l] = 0; - int bucket = hash_key_to_bucket(bpfen.data(), bpfen.size(), num_buckets); - elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp, file_num, start_position}); + int bucket = hash_key_to_bucket(bpfen, bpfen_len, num_buckets); + elems[bucket].emplace_back(Element {bpfen, bpfen_len, {}, Result(r), opening_num, white_elo, black_elo, timestamp, file_num, start_position}); + strcpy(elems[bucket].back().move, move); ++num_elems; } fclose(fp); @@ -166,10 +183,12 @@ int main(int argc, char **argv) moves.insert(e.move); c.add_move(e.move); } - if (j == elems[i].size() - 1 || e.bpfen != elems[i][j + 1].bpfen) { + if (j == elems[i].size() - 1 || + e.bpfen_len != elems[i][j + 1].bpfen_len || + memcmp(e.bpfen, elems[i][j + 1].bpfen, e.bpfen_len) != 0) { c.SerializeToString(&buf); mtbl_writer_add(mtbl, - (const uint8_t *)e.bpfen.data(), e.bpfen.size(), + (const uint8_t *)e.bpfen, e.bpfen_len, (const uint8_t *)buf.data(), buf.size()); c = Count(); moves.clear(); -- 2.39.2