+/* Convert Unicode code point to UTF-8 */
+static char *utf8_cp (uint_fast32_t cp, char *buf)
+{
+ if (cp < (1 << 7))
+ {
+ buf[1] = 0;
+ buf[0] = cp;
+ }
+ else if (cp < (1 << 11))
+ {
+ buf[2] = 0;
+ buf[1] = 0x80 | (cp & 0x3F);
+ cp >>= 6;
+ buf[0] = 0xC0 | cp;
+ }
+ else if (cp < (1 << 16))
+ {
+ buf[3] = 0;
+ buf[2] = 0x80 | (cp & 0x3F);
+ cp >>= 6;
+ buf[1] = 0x80 | (cp & 0x3F);
+ cp >>= 6;
+ buf[0] = 0xE0 | cp;
+ }
+ else if (cp < (1 << 21))
+ {
+ buf[4] = 0;
+ buf[3] = 0x80 | (cp & 0x3F);
+ cp >>= 6;
+ buf[2] = 0x80 | (cp & 0x3F);
+ cp >>= 6;
+ buf[1] = 0x80 | (cp & 0x3F);
+ cp >>= 6;
+ buf[0] = 0xE0 | cp;
+ }
+ else
+ return NULL;
+ return buf;
+}
+
+/* Convert UTF-8 to Unicode code point */
+static uint_fast32_t cp_utf8 (const char *utf8)
+{
+ uint8_t f = utf8[0];
+ size_t l = strlen (utf8);
+
+ if (f < 0x80) /* ASCII (7 bits) */
+ return f;
+ if (f < 0xC0 || l < 2) /* bad */
+ return 0;
+ if (f < 0xE0) /* two bytes (11 bits) */
+ return ((f & 0x1F) << 6) | (utf8[1] & 0x3F);
+ if (l < 3) /* bad */
+ return 0;
+ if (f < 0xF0) /* three bytes (16 bits) */
+ return ((f & 0x0F) << 12) | ((utf8[1] & 0x3F) << 6)
+ | (utf8[2] & 0x3F);
+ if (l < 4)
+ return 0;
+ if (f < 0xF8) /* four bytes (21 bits) */
+ return ((f & 0x07) << 18) | ((utf8[1] & 0x3F) << 12)
+ | ((utf8[2] & 0x3F) << 6) | (utf8[3] & 0x3F);
+ return 0;
+}
+