json: Reject invalid UTF-8 sequences We reject bytes that can't occur in valid UTF-8 (\xC0..\xC1, \xF5..\xFF in the lexer. That's insufficient; there's plenty of invalid UTF-8 not containing these bytes, as demonstrated by check-qjson: * Malformed sequences - Unexpected continuation bytes - Missing continuation bytes after start bytes other than \xC0..\xC1, \xF5..\xFD. * Overlong sequences with start bytes other than \xC0..\xC1, \xF5..\xFD. * Invalid code points Fixing this in the lexer would be bothersome. Fixing it in the parser is straightforward, so do that. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-Id: <20180823164025.12553-23-armbru@redhat.com>

commit: e59f39d40397645477b959255aedfa17a7c9c779 [log] [tgz]
author: Markus Armbruster <armbru@redhat.com> Thu Aug 23 18:39:49 2018 +0200
committer: Markus Armbruster <armbru@redhat.com> Fri Aug 24 20:26:37 2018 +0200
tree: 9d1bbd816632aa79e9015ddd47eb7a827b1787f0
parent: a89d3104a29c400dfed4b675d6385a17223f9e0f [diff] [blame]
diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index 164b867..0e232ff 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c

@@ -13,6 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
+#include "qemu/unicode.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "qapi/qmp/qbool.h"
@@ -133,6 +134,10 @@
     const char *ptr = token->str;
     QString *str;
     char quote;
+    int cp;
+    char *end;
+    ssize_t len;
+    char utf8_buf[5];
 
     assert(*ptr == '"' || *ptr == '\'');
     quote = *ptr++;
@@ -194,12 +199,15 @@
                 goto out;
             }
         } else {
-            char dummy[2];
-
-            dummy[0] = *ptr++;
-            dummy[1] = 0;
-
-            qstring_append(str, dummy);
+            cp = mod_utf8_codepoint(ptr, 6, &end);
+            if (cp <= 0) {
+                parse_error(ctxt, token, "invalid UTF-8 sequence in string");
+                goto out;
+            }
+            ptr = end;
+            len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
+            assert(len >= 0);
+            qstring_append(str, utf8_buf);
         }
     }
commit	e59f39d40397645477b959255aedfa17a7c9c779	[log] [tgz]
author	Markus Armbruster <armbru@redhat.com>	Thu Aug 23 18:39:49 2018 +0200
committer	Markus Armbruster <armbru@redhat.com>	Fri Aug 24 20:26:37 2018 +0200
tree	9d1bbd816632aa79e9015ddd47eb7a827b1787f0
parent	a89d3104a29c400dfed4b675d6385a17223f9e0f [diff] [blame]