]> git.sesse.net Git - remoteglot-book/commitdiff
Reduce binloader RAM requirements by over 40%.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 13 Dec 2014 11:37:21 +0000 (12:37 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 13 Dec 2014 11:57:19 +0000 (12:57 +0100)
Makefile
arena.cpp [new file with mode: 0644]
arena.h [new file with mode: 0644]
binloader.cpp

index cd85e4fdeebb10acbe1a71d2f8cf45561c642c3d..9d133eca5c6f2a0817d8a1e1d62effa8b4ac1712 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,11 +4,11 @@ PROTOC=protoc
 
 all: binloader binlookup binmerger
 
-binloader: binloader.o hash.o count.pb.o
+binloader: binloader.o hash.o arena.o count.pb.o
 binmerger: binmerger.o count.pb.o
 binlookup: binlookup.o hash.o count.pb.o
 
-binloader.o: binloader.cpp count.pb.h
+binloader.o: binloader.cpp arena.cpp count.pb.h
 binmerger.o: binmerger.cpp count.pb.h
 binlookup.o: binlookup.cpp count.pb.h
 
@@ -17,4 +17,4 @@ binlookup.o: binlookup.cpp count.pb.h
 
 .PHONY: clean
 clean:
-       $(RM) binloader binlookup binmerger binloader.o binmerger.o binlookup.o hash.o count.pb.o count.pb.h count.pb.cc
+       $(RM) binloader binlookup binmerger binloader.o binmerger.o binlookup.o hash.o arena.o count.pb.o count.pb.h count.pb.cc
diff --git a/arena.cpp b/arena.cpp
new file mode 100644 (file)
index 0000000..048e6db
--- /dev/null
+++ b/arena.cpp
@@ -0,0 +1,33 @@
+#include <stdlib.h>
+#include <assert.h>
+#include "arena.h"
+
+Arena::Arena() : first(NULL) {}
+
+Arena::~Arena()
+{
+       Block *next;
+       for (Block *b = first; b != NULL; b = next) {
+               delete[] b->memory;
+
+               next = b->next;
+               delete b;
+       }
+}
+
+char *Arena::alloc(size_t bytes)
+{
+       assert(bytes < BLOCK_SIZE);  // Can fix, but we don't need to.
+
+       if (first == NULL || first->used + bytes > BLOCK_SIZE) {
+               Block *b = new Block;
+               b->memory = new char[BLOCK_SIZE];
+               b->used = 0;
+               b->next = first;
+               first = b;
+       }
+
+       char *ret = first->memory + first->used;
+       first->used += bytes;
+       return ret;
+}
diff --git a/arena.h b/arena.h
new file mode 100644 (file)
index 0000000..5d254dd
--- /dev/null
+++ b/arena.h
@@ -0,0 +1,24 @@
+#ifndef _ARENA_H
+#define _ARENA_H
+
+// A simple arena for allocating lots of short strings.
+
+class Arena {
+public:
+       Arena();
+       ~Arena();
+
+       char *alloc(size_t bytes);
+
+private:
+       static constexpr size_t BLOCK_SIZE = 1048576;
+
+       struct Block {
+               char *memory;
+               size_t used;
+               Block *next;
+       };
+       Block *first;
+};
+
+#endif  // _ARENA_H
index 8660e8e299bd93f70f19767d671af0e6d8288f80..272fc30c900c32ee9d788b757683774b0f5c97f4 100644 (file)
 #include <unordered_set>
 #include <string.h>
 #include "count.pb.h"
+#include "arena.h"
 #include "hash.h"
 
 #define DUMMY_TIMESTAMP 32503680000
 
 using namespace std;
 
+Arena arena;
+
 enum Result { WHITE = 0, DRAW, BLACK };
 struct Element {
-       string bpfen;
-       string move;
+       char *bpfen;
+       int bpfen_len;
+       char move[8];   // Na1xc3+
        Result result;
        int opening_num, white_elo, black_elo;
-       time_t timestamp;
        int file_num;
+       time_t timestamp;
        long start_position;
 
        bool operator< (const Element& other) const {
-               return bpfen < other.bpfen;
+               int shared_len = min(bpfen_len, other.bpfen_len);
+               int s = memcmp(bpfen, other.bpfen, shared_len);
+               if (s < 0) {
+                       return true;
+               } else if (s > 0) {
+                       return false;
+               } else {
+                       return bpfen_len < other.bpfen_len;
+               }
        }
 };
 
@@ -48,14 +60,13 @@ int main(int argc, char **argv)
                        exit(1);
                }
                for ( ;; ) {
-                       int l = getc(fp);
-                       if (l == -1) {
+                       int bpfen_len = getc(fp);
+                       if (bpfen_len == -1) {
                                break;
                        }
                
-                       string bpfen;
-                       bpfen.resize(l);
-                       if (fread(&bpfen[0], l, 1, fp) != 1) {
+                       char *bpfen = arena.alloc(bpfen_len);
+                       if (fread(bpfen, bpfen_len, 1, fp) != 1) {
                                perror("fread()");
                //              exit(1);
                                break;
@@ -103,20 +114,26 @@ int main(int argc, char **argv)
                        }
 
 
-                       l = getc(fp);
+                       char move[8];
+                       int l = getc(fp);
                        if (l == -1) {
                                break;
                        }
-                       string move;
-                       move.resize(l);
+                       if (l >= int(sizeof(move))) {
+                               fprintf(stderr, "Overlong move (%d bytes)\n", l);
+               //              exit(1);
+                               break;
+                       }
                        if (fread(&move[0], l, 1, fp) != 1) {
                                perror("fread()");
                //              exit(1);
                                break;
                        }
+                       move[l] = 0;
 
-                       int bucket = hash_key_to_bucket(bpfen.data(), bpfen.size(), num_buckets);
-                       elems[bucket].emplace_back(Element {std::move(bpfen), std::move(move), Result(r), opening_num, white_elo, black_elo, timestamp, file_num, start_position});
+                       int bucket = hash_key_to_bucket(bpfen, bpfen_len, num_buckets);
+                       elems[bucket].emplace_back(Element {bpfen, bpfen_len, {}, Result(r), opening_num, white_elo, black_elo, timestamp, file_num, start_position});
+                       strcpy(elems[bucket].back().move, move);
                        ++num_elems;
                }
                fclose(fp);
@@ -166,10 +183,12 @@ int main(int argc, char **argv)
                                moves.insert(e.move);
                                c.add_move(e.move);
                        }
-                       if (j == elems[i].size() - 1 || e.bpfen != elems[i][j + 1].bpfen) {
+                       if (j == elems[i].size() - 1 ||
+                           e.bpfen_len != elems[i][j + 1].bpfen_len ||
+                           memcmp(e.bpfen, elems[i][j + 1].bpfen, e.bpfen_len) != 0) {
                                c.SerializeToString(&buf);
                                mtbl_writer_add(mtbl,
-                                       (const uint8_t *)e.bpfen.data(), e.bpfen.size(),
+                                       (const uint8_t *)e.bpfen, e.bpfen_len,
                                        (const uint8_t *)buf.data(), buf.size());
                                c = Count();
                                moves.clear();