+#define LUT8_PART(plane, v) \
+ AV_LE2NE64C(UINT64_C(0x0000000)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1000000)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x0010000)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1010000)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x0000100)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1000100)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x0010100)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1010100)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x0000001)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1000001)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x0010001)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1010001)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x0000101)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1000101)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x0010101)<<32 | v) << plane, \
+ AV_LE2NE64C(UINT64_C(0x1010101)<<32 | v) << plane
+
+#define LUT8(plane) { \
+ LUT8_PART(plane, 0x0000000), \
+ LUT8_PART(plane, 0x1000000), \
+ LUT8_PART(plane, 0x0010000), \
+ LUT8_PART(plane, 0x1010000), \
+ LUT8_PART(plane, 0x0000100), \
+ LUT8_PART(plane, 0x1000100), \
+ LUT8_PART(plane, 0x0010100), \
+ LUT8_PART(plane, 0x1010100), \
+ LUT8_PART(plane, 0x0000001), \
+ LUT8_PART(plane, 0x1000001), \
+ LUT8_PART(plane, 0x0010001), \
+ LUT8_PART(plane, 0x1010001), \
+ LUT8_PART(plane, 0x0000101), \
+ LUT8_PART(plane, 0x1000101), \
+ LUT8_PART(plane, 0x0010101), \
+ LUT8_PART(plane, 0x1010101), \
+}
+
+// 8 planes * 8-bit mask
+static const uint64_t plane8_lut[8][256] = {
+ LUT8(0), LUT8(1), LUT8(2), LUT8(3),
+ LUT8(4), LUT8(5), LUT8(6), LUT8(7),
+};
+
+#define LUT32(plane) { \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 1 << plane, \
+ 0, 0, 1 << plane, 0, \
+ 0, 0, 1 << plane, 1 << plane, \
+ 0, 1 << plane, 0, 0, \
+ 0, 1 << plane, 0, 1 << plane, \
+ 0, 1 << plane, 1 << plane, 0, \
+ 0, 1 << plane, 1 << plane, 1 << plane, \
+ 1 << plane, 0, 0, 0, \
+ 1 << plane, 0, 0, 1 << plane, \
+ 1 << plane, 0, 1 << plane, 0, \
+ 1 << plane, 0, 1 << plane, 1 << plane, \
+ 1 << plane, 1 << plane, 0, 0, \
+ 1 << plane, 1 << plane, 0, 1 << plane, \
+ 1 << plane, 1 << plane, 1 << plane, 0, \
+ 1 << plane, 1 << plane, 1 << plane, 1 << plane, \
+}
+
+// 32 planes * 4-bit mask * 4 lookup tables each
+static const uint32_t plane32_lut[32][16*4] = {
+ LUT32( 0), LUT32( 1), LUT32( 2), LUT32( 3),
+ LUT32( 4), LUT32( 5), LUT32( 6), LUT32( 7),
+ LUT32( 8), LUT32( 9), LUT32(10), LUT32(11),
+ LUT32(12), LUT32(13), LUT32(14), LUT32(15),
+ LUT32(16), LUT32(17), LUT32(18), LUT32(19),
+ LUT32(20), LUT32(21), LUT32(22), LUT32(23),
+ LUT32(24), LUT32(25), LUT32(26), LUT32(27),
+ LUT32(28), LUT32(29), LUT32(30), LUT32(31),
+};
+
+// Gray to RGB, required for palette table of grayscale images with bpp < 8
+static av_always_inline uint32_t gray2rgb(const uint32_t x) {
+ return x << 16 | x << 8 | x;
+}
+