From 9ae7d4e604370f2dec236494c48bc33b685a014b Mon Sep 17 00:00:00 2001 From: =?utf8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 7 Feb 2010 11:40:52 +0200 Subject: [PATCH] IsUTF8: reject surrogates and non-Unicode code points --- src/text/unicode.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/text/unicode.c b/src/text/unicode.c index 1c0501a1db..77b7684d4a 100644 --- a/src/text/unicode.c +++ b/src/text/unicode.c @@ -304,7 +304,7 @@ static char *CheckUTF8( char *str, char rep ) goto error; } - assert (charlen >= 2); + assert (charlen >= 2 && charlen <= 4); uint32_t cp = c & ~((0xff >> (7 - charlen)) << (7 - charlen)); for (int i = 1; i < charlen; i++) @@ -318,11 +318,20 @@ static char *CheckUTF8( char *str, char rep ) cp = (cp << 6) | (ptr[i] & 0x3f); } - if (cp < 128) // overlong (special case for ASCII) - goto error; - if (cp < (1u << (5 * charlen - 3))) // overlong - goto error; - + switch (charlen) + { + case 4: + if (cp > 0x10FFFF) // beyond Unicode + goto error; + case 3: + if (cp >= 0xD800 && cp < 0xC000) // UTF-16 surrogate + goto error; + case 2: + if (cp < 128) // ASCII overlong + goto error; + if (cp < (1u << (5 * charlen - 3))) // overlong + goto error; + } ptr += charlen; continue; -- 2.39.2