From: Stéphane Nicolet Date: Tue, 27 Jun 2023 10:03:00 +0000 (+0200) Subject: Document the LEB128 patch X-Git-Url: https://git.sesse.net/?p=stockfish;a=commitdiff_plain;h=e355c7059468048bbb8b9f10e2b32606aa72eb77 Document the LEB128 patch Add some comments and harmonize style for the LEB128 patch. closes https://github.com/official-stockfish/Stockfish/pull/4642 No functional change --- diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index d338527d..e8ed2bc6 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -86,6 +86,7 @@ namespace Stockfish::Eval::NNUE { return (n + base - 1) / base * base; } + // read_little_endian() is our utility to read an integer (signed or unsigned, any size) // from a stream in little-endian order. We swap the byte order after the read if // necessary to return a result with the byte ordering of the compiling machine. @@ -110,6 +111,7 @@ namespace Stockfish::Eval::NNUE { return result; } + // write_little_endian() is our utility to write an integer (signed or unsigned, any size) // to a stream in little-endian order. We swap the byte order before the write if // necessary to always write in little endian order, independently of the byte @@ -140,6 +142,7 @@ namespace Stockfish::Eval::NNUE { } } + // read_little_endian(s, out, N) : read integers in bulk from a little indian stream. // This reads N integers from stream s and put them in array out. template @@ -151,6 +154,7 @@ namespace Stockfish::Eval::NNUE { out[i] = read_little_endian(stream); } + // write_little_endian(s, values, N) : write integers in bulk to a little indian stream. // This takes N integers from array values and writes them on stream s. template @@ -162,77 +166,119 @@ namespace Stockfish::Eval::NNUE { write_little_endian(stream, values[i]); } + + // read_leb_128(s, out, N) : read N signed integers from the stream s, putting them in + // the array out. The stream is assumed to be compressed using the signed LEB128 format. + // See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. template inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) { - static_assert(std::is_signed_v, "Not implemented for unsigned types"); + + // Check the presence of our LEB128 magic string char leb128MagicString[Leb128MagicStringSize]; stream.read(leb128MagicString, Leb128MagicStringSize); assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); + + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + const std::uint32_t BUF_SIZE = 4096; std::uint8_t buf[BUF_SIZE]; + auto bytes_left = read_little_endian(stream); + std::uint32_t buf_pos = BUF_SIZE; - for (std::size_t i = 0; i < count; ++i) { + for (std::size_t i = 0; i < count; ++i) + { IntType result = 0; size_t shift = 0; - do { - if (buf_pos == BUF_SIZE) { + do + { + if (buf_pos == BUF_SIZE) + { stream.read(reinterpret_cast(buf), std::min(bytes_left, BUF_SIZE)); buf_pos = 0; } + std::uint8_t byte = buf[buf_pos++]; --bytes_left; result |= (byte & 0x7f) << shift; shift += 7; - if ((byte & 0x80) == 0) { - out[i] = sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0 ? result : result | ~((1 << shift) - 1); + + if ((byte & 0x80) == 0) + { + out[i] = (sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0) ? result + : result | ~((1 << shift) - 1); break; } - } while (shift < sizeof(IntType) * 8); + } + while (shift < sizeof(IntType) * 8); } + assert(bytes_left == 0); } + + // write_leb_128(s, values, N) : write signed integers to a stream with LEB128 compression. + // This takes N integers from array values, compress them with the LEB128 algorithm and + // writes the result on the stream s. + // See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. template inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) { - static_assert(std::is_signed_v, "Not implemented for unsigned types"); + + // Write our LEB128 magic string stream.write(Leb128MagicString, Leb128MagicStringSize); + + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + std::uint32_t byte_count = 0; - for (std::size_t i = 0; i < count; ++i) { + for (std::size_t i = 0; i < count; ++i) + { IntType value = values[i]; std::uint8_t byte; - do { + do + { byte = value & 0x7f; value >>= 7; ++byte_count; - } while ((byte & 0x40) == 0 ? value != 0 : value != -1); + } + while ((byte & 0x40) == 0 ? value != 0 : value != -1); } + write_little_endian(stream, byte_count); + const std::uint32_t BUF_SIZE = 4096; std::uint8_t buf[BUF_SIZE]; std::uint32_t buf_pos = 0; + auto flush = [&]() { - if (buf_pos > 0) { + if (buf_pos > 0) + { stream.write(reinterpret_cast(buf), buf_pos); buf_pos = 0; } }; + auto write = [&](std::uint8_t byte) { buf[buf_pos++] = byte; - if (buf_pos == BUF_SIZE) flush(); + if (buf_pos == BUF_SIZE) + flush(); }; - for (std::size_t i = 0; i < count; ++i) { + + for (std::size_t i = 0; i < count; ++i) + { IntType value = values[i]; - while (true) { + while (true) + { std::uint8_t byte = value & 0x7f; value >>= 7; - if ((byte & 0x40) == 0 ? value == 0 : value == -1) { + if ((byte & 0x40) == 0 ? value == 0 : value == -1) + { write(byte); break; } write(byte | 0x80); } } + flush(); }