blob: ca6d1e80f84d7ecb20a7f94cf099cbb586699748 [file] [log] [blame]
Nigel Taod0b16cb2020-03-14 10:15:54 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// ----------------
16
17/*
18jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON Pointer
19(RFC 6901) to stdout.
20
Nigel Taod60815c2020-03-26 14:32:35 +110021See the "const char* g_usage" string below for details.
Nigel Taod0b16cb2020-03-14 10:15:54 +110022
23----
24
25This program uses Wuffs' JSON decoder at a relatively high level, building
26in-memory representations of JSON 'things' (e.g. numbers, strings, objects).
27After the entire input has been converted, walking the tree prints the output
Nigel Taocf6c5782020-08-03 23:43:45 +100028(in sorted order). The wuffs_aux::DecodeJson library function converts the
29lower level token stream to higher level callbacks. This .cc file deals only
30with those callbacks, not with tokens per se.
Nigel Taod0b16cb2020-03-14 10:15:54 +110031
32This approach is centered around JSON things. Each JSON thing comprises one or
33more JSON tokens.
34
35An alternative, lower-level approach is in the sibling example/jsonptr program.
36Neither approach is better or worse per se, but when studying this program, be
37aware that there are multiple ways to use Wuffs' JSON decoder.
38
39The two programs, jsonfindptrs and jsonptr, also demonstrate different
40trade-offs with regard to JSON object duplicate keys. The JSON spec permits
41different implementations to allow or reject duplicate keys. It is not always
42clear which approach is safer. Rejecting them is certainly unambiguous, and
43security bugs can lurk in ambiguous corners of a file format, if two different
44implementations both silently accept a file but differ on how to interpret it.
45On the other hand, in the worst case, detecting duplicate keys requires O(N)
46memory, where N is the size of the (potentially untrusted) input.
47
48This program (jsonfindptrs) rejects duplicate keys.
49
50----
51
Nigel Tao50bfab92020-08-05 11:39:09 +100052To run:
Nigel Taod0b16cb2020-03-14 10:15:54 +110053
54$CXX jsonfindptrs.cc && ./a.out < ../../test/data/github-tags.json; rm -f a.out
55
56for a C++ compiler $CXX, such as clang++ or g++.
57*/
58
Nigel Tao5396dbd2020-08-29 22:02:35 +100059#if defined(__cplusplus) && (__cplusplus < 201703L)
60#error "This C++ program requires -std=c++17 or later"
Nigel Tao721190a2020-04-03 22:25:21 +110061#endif
62
Nigel Taocf6c5782020-08-03 23:43:45 +100063#include <stdio.h>
Nigel Tao6b7ce302020-07-07 16:19:46 +100064
Nigel Taod0b16cb2020-03-14 10:15:54 +110065#include <iostream>
66#include <map>
67#include <string>
68#include <vector>
69
Nigel Tao5396dbd2020-08-29 22:02:35 +100070// <variant> requires C++17.
71#include <variant>
72
Nigel Taod0b16cb2020-03-14 10:15:54 +110073// Wuffs ships as a "single file C library" or "header file library" as per
74// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
75//
76// To use that single file as a "foo.c"-like implementation, instead of a
77// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
78// compiling it.
79#define WUFFS_IMPLEMENTATION
80
Nigel Tao7f9f37c2021-10-04 12:35:32 +110081// Defining the WUFFS_CONFIG__STATIC_FUNCTIONS macro is optional, but when
82// combined with WUFFS_IMPLEMENTATION, it demonstrates making all of Wuffs'
83// functions have static storage.
84//
85// This can help the compiler ignore or discard unused code, which can produce
86// faster compiles and smaller binaries. Other motivations are discussed in the
87// "ALLOW STATIC IMPLEMENTATION" section of
88// https://raw.githubusercontent.com/nothings/stb/master/docs/stb_howto.txt
89#define WUFFS_CONFIG__STATIC_FUNCTIONS
90
Nigel Taod0b16cb2020-03-14 10:15:54 +110091// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
Nigel Tao2f788042021-01-23 19:29:19 +110092// release/c/etc.c choose which parts of Wuffs to build. That file contains the
93// entire Wuffs standard library, implementing a variety of codecs and file
Nigel Taod0b16cb2020-03-14 10:15:54 +110094// formats. Without this macro definition, an optimizing compiler or linker may
95// very well discard Wuffs code for unused codecs, but listing the Wuffs
96// modules we use makes that process explicit. Preprocessing means that such
97// code simply isn't compiled.
98#define WUFFS_CONFIG__MODULES
Nigel Taocf6c5782020-08-03 23:43:45 +100099#define WUFFS_CONFIG__MODULE__AUX__BASE
100#define WUFFS_CONFIG__MODULE__AUX__JSON
Nigel Taod0b16cb2020-03-14 10:15:54 +1100101#define WUFFS_CONFIG__MODULE__BASE
102#define WUFFS_CONFIG__MODULE__JSON
103
104// If building this program in an environment that doesn't easily accommodate
105// relative includes, you can use the script/inline-c-relative-includes.go
106// program to generate a stand-alone C++ file.
107#include "../../release/c/wuffs-unsupported-snapshot.c"
108
109#define TRY(error_msg) \
110 do { \
111 std::string z = error_msg; \
112 if (!z.empty()) { \
113 return z; \
114 } \
115 } while (false)
116
Nigel Taod60815c2020-03-26 14:32:35 +1100117static const char* g_usage =
Nigel Taod0b16cb2020-03-14 10:15:54 +1100118 "Usage: jsonfindptrs -flags input.json\n"
119 "\n"
120 "Flags:\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100121 " -d=NUM -max-output-depth=NUM\n"
Nigel Tao8aac6762020-08-12 22:47:45 +1000122 " -q=STR -query=STR\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000123 " -input-allow-comments\n"
124 " -input-allow-extra-comma\n"
125 " -input-allow-inf-nan-numbers\n"
Nigel Tao04126792021-02-22 12:23:57 +1100126 " -input-jwcc\n"
127 " -jwcc\n"
Nigel Tao8d779ae2020-08-29 23:10:08 +1000128 " -only-parse-dont-output\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000129 " -strict-json-pointer-syntax\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100130 "\n"
131 "The input.json filename is optional. If absent, it reads from stdin.\n"
132 "\n"
133 "----\n"
134 "\n"
135 "jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON\n"
136 "Pointer (RFC 6901) to stdout.\n"
137 "\n"
138 "For example, given RFC 6901 section 5's sample input\n"
139 "(https://tools.ietf.org/rfc/rfc6901.txt), this command:\n"
140 " jsonfindptrs rfc-6901-json-pointer.json\n"
141 "will print:\n"
142 " \n"
143 " /\n"
144 " / \n"
145 " /a~1b\n"
146 " /c%d\n"
147 " /e^f\n"
148 " /foo\n"
149 " /foo/0\n"
150 " /foo/1\n"
151 " /g|h\n"
152 " /i\\j\n"
153 " /k\"l\n"
154 " /m~0n\n"
155 "\n"
156 "The first three lines are (1) a 0-byte \"\", (2) a 1-byte \"/\" and (3)\n"
157 "a 2-byte \"/ \". Unlike a file system, the \"/\" JSON Pointer does not\n"
158 "identify the root. Instead, \"\" is the root and \"/\" is the child (the\n"
159 "value in a key-value pair) of the root whose key is the empty string.\n"
160 "Similarly, \"/xyz\" and \"/xyz/\" are two different nodes.\n"
161 "\n"
162 "----\n"
163 "\n"
164 "The JSON specification (https://json.org/) permits implementations that\n"
165 "allow duplicate keys, but this one does not. Conversely, it prints keys\n"
166 "in sorted order, but the overall output is not necessarily sorted\n"
167 "lexicographically. For example, \"/a/9\" would come before \"/a/10\",\n"
168 "and \"/b/c\", a child of \"/b\", would come before \"/b+\".\n"
169 "\n"
170 "This JSON implementation also rejects integer values outside ±M, where\n"
171 "M is ((1<<53)-1), also known as JavaScript's Number.MAX_SAFE_INTEGER.\n"
172 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000173 "The -input-allow-comments flag allows \"/*slash-star*/\" and\n"
174 "\"//slash-slash\" C-style comments within JSON input.\n"
175 "\n"
176 "The -input-allow-extra-comma flag allows input like \"[1,2,]\", with a\n"
Nigel Taoc766bb72020-07-09 12:59:32 +1000177 "comma after the final element of a JSON list or dictionary.\n"
178 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000179 "The -input-allow-inf-nan-numbers flag allows non-finite floating point\n"
180 "numbers (infinities and not-a-numbers) within JSON input.\n"
181 "\n"
Nigel Tao04126792021-02-22 12:23:57 +1100182 "Combining some of those flags results in speaking JWCC (JSON With Commas\n"
183 "and Comments), not plain JSON. For convenience, the -input-jwcc or -jwcc\n"
184 "flags enables all of:\n"
185 " -input-allow-comments\n"
186 " -input-allow-extra-comma\n"
187 "\n"
188#if defined(WUFFS_EXAMPLE_SPEAK_JWCC_NOT_JSON)
189 "This program was configured at compile time to always use -jwcc.\n"
190 "\n"
191#endif
Nigel Taod0b16cb2020-03-14 10:15:54 +1100192 "----\n"
193 "\n"
Nigel Tao8d779ae2020-08-29 23:10:08 +1000194 "The -only-parse-dont-output flag means to write nothing to stdout. An\n"
195 "error message will still be written to stderr if the input is invalid.\n"
196 "\n"
197 "----\n"
198 "\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000199 "The -strict-json-pointer-syntax flag restricts the output lines to\n"
200 "exactly RFC 6901, with only two escape sequences: \"~0\" and \"~1\" for\n"
Nigel Tao904004e2020-11-15 20:56:04 +1100201 "\"~\" and \"/\". Without this flag, this program also lets \"~n\",\n"
202 "\"~r\" and \"~t\" escape the New Line, Carriage Return and Horizontal\n"
203 "Tab ASCII control characters, which can work better with line oriented\n"
204 "(and tab separated) Unix tools that assume exactly one record (e.g. one\n"
205 "JSON Pointer string) per line. With this flag, it fails if the input\n"
206 "JSON's keys contain \"\\u0009\", \"\\u000A\" or \"\\u000D\".\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100207 "\n"
208 "----\n"
209 "\n"
210 "The JSON specification permits implementations to set their own maximum\n"
211 "input depth. This JSON implementation sets it to 1024.\n"
212 "\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100213 "The -d=NUM or -max-output-depth=NUM flag gives the maximum (inclusive)\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100214 "output depth. JSON containers ([] arrays and {} objects) can hold other\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100215 "containers. A bare -d or -max-output-depth is equivalent to -d=1,\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100216 "analogous to the Unix ls command. The flag's absence is equivalent to an\n"
217 "unlimited output depth, analogous to the Unix find command (and hence\n"
218 "the name of this program: jsonfindptrs).";
219
220// ----
221
Nigel Taocf6c5782020-08-03 23:43:45 +1000222std::vector<uint32_t> g_quirks;
223
Nigel Tao8098d962020-08-29 10:41:05 +1000224std::string g_dst;
225
Nigel Taob6c01b32020-08-29 10:46:04 +1000226// g_to_string_cache[i] caches the result of std::to_string(i).
227std::vector<std::string> g_to_string_cache;
228
Nigel Taod0b16cb2020-03-14 10:15:54 +1100229struct {
230 int remaining_argc;
231 char** remaining_argv;
232
Nigel Tao8d779ae2020-08-29 23:10:08 +1000233 bool only_parse_dont_output;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100234 bool strict_json_pointer_syntax;
Nigel Tao0a0c7d62020-08-18 23:31:27 +1000235
236 uint32_t max_output_depth;
237
238 char* query_c_string;
Nigel Taod60815c2020-03-26 14:32:35 +1100239} g_flags = {0};
Nigel Taod0b16cb2020-03-14 10:15:54 +1100240
241std::string //
242parse_flags(int argc, char** argv) {
Nigel Taod60815c2020-03-26 14:32:35 +1100243 g_flags.max_output_depth = 0xFFFFFFFF;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100244
Nigel Tao04126792021-02-22 12:23:57 +1100245#if defined(WUFFS_EXAMPLE_SPEAK_JWCC_NOT_JSON)
246 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
247 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
248 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
249#endif
250
Nigel Taod0b16cb2020-03-14 10:15:54 +1100251 int c = (argc > 0) ? 1 : 0; // Skip argv[0], the program name.
252 for (; c < argc; c++) {
253 char* arg = argv[c];
254 if (*arg++ != '-') {
255 break;
256 }
257
258 // A double-dash "--foo" is equivalent to a single-dash "-foo". As special
259 // cases, a bare "-" is not a flag (some programs may interpret it as
260 // stdin) and a bare "--" means to stop parsing flags.
261 if (*arg == '\x00') {
262 break;
263 } else if (*arg == '-') {
264 arg++;
265 if (*arg == '\x00') {
266 c++;
267 break;
268 }
269 }
270
Nigel Tao94440cf2020-04-02 22:28:24 +1100271 if (!strcmp(arg, "d") || !strcmp(arg, "max-output-depth")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100272 g_flags.max_output_depth = 1;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100273 continue;
Nigel Tao94440cf2020-04-02 22:28:24 +1100274 } else if (!strncmp(arg, "d=", 2) ||
Nigel Taod0b16cb2020-03-14 10:15:54 +1100275 !strncmp(arg, "max-output-depth=", 16)) {
276 while (*arg++ != '=') {
277 }
278 wuffs_base__result_u64 u = wuffs_base__parse_number_u64(
Nigel Tao6b7ce302020-07-07 16:19:46 +1000279 wuffs_base__make_slice_u8((uint8_t*)arg, strlen(arg)),
280 WUFFS_BASE__PARSE_NUMBER_XXX__DEFAULT_OPTIONS);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100281 if (wuffs_base__status__is_ok(&u.status) && (u.value <= 0xFFFFFFFF)) {
Nigel Taod60815c2020-03-26 14:32:35 +1100282 g_flags.max_output_depth = (uint32_t)(u.value);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100283 continue;
284 }
Nigel Taod60815c2020-03-26 14:32:35 +1100285 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100286 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000287 if (!strcmp(arg, "input-allow-comments")) {
288 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
289 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
290 continue;
291 }
292 if (!strcmp(arg, "input-allow-extra-comma")) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000293 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
Nigel Taoc766bb72020-07-09 12:59:32 +1000294 continue;
295 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000296 if (!strcmp(arg, "input-allow-inf-nan-numbers")) {
297 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS);
298 continue;
299 }
Nigel Tao04126792021-02-22 12:23:57 +1100300 if (!strcmp(arg, "input-jwcc") || !strcmp(arg, "jwcc")) {
301 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
302 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
303 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
304 continue;
305 }
Nigel Tao8aac6762020-08-12 22:47:45 +1000306 if (!strncmp(arg, "q=", 2) || !strncmp(arg, "query=", 6)) {
307 while (*arg++ != '=') {
308 }
309 g_flags.query_c_string = arg;
310 continue;
311 }
Nigel Tao8d779ae2020-08-29 23:10:08 +1000312 if (!strcmp(arg, "only-parse-dont-output")) {
313 g_flags.only_parse_dont_output = true;
314 continue;
315 }
Nigel Taoecadf722020-07-13 08:22:34 +1000316 if (!strcmp(arg, "strict-json-pointer-syntax")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100317 g_flags.strict_json_pointer_syntax = true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100318 continue;
319 }
320
Nigel Taod60815c2020-03-26 14:32:35 +1100321 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100322 }
323
Nigel Taod60815c2020-03-26 14:32:35 +1100324 g_flags.remaining_argc = argc - c;
325 g_flags.remaining_argv = argv + c;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100326 return "";
327}
328
Nigel Tao6b7ce302020-07-07 16:19:46 +1000329// ----
Nigel Taod0b16cb2020-03-14 10:15:54 +1100330
Nigel Tao5396dbd2020-08-29 22:02:35 +1000331struct JsonValue;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100332
Nigel Tao5396dbd2020-08-29 22:02:35 +1000333using JsonVector = std::vector<JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100334
Nigel Tao5396dbd2020-08-29 22:02:35 +1000335// We use a std::map in this example program to avoid dependencies outside of
336// the C++ standard library. If you're copy/pasting this JsonValue code,
337// consider a more efficient data structure such as an absl::btree_map.
338//
339// See CppCon 2014: Chandler Carruth "Efficiency with Algorithms, Performance
340// with Data Structures" at https://www.youtube.com/watch?v=fHNmRkzxHWs
341using JsonMap = std::map<std::string, JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100342
Nigel Tao5396dbd2020-08-29 22:02:35 +1000343using JsonVariant = std::variant<std::monostate,
344 bool,
345 int64_t,
346 double,
347 std::string,
348 JsonVector,
349 JsonMap>;
350
351struct JsonValue : JsonVariant {
352 JsonValue() : JsonVariant() {}
353 JsonValue(bool x) : JsonVariant(x) {}
354 JsonValue(int64_t x) : JsonVariant(x) {}
355 JsonValue(double x) : JsonVariant(x) {}
356 JsonValue(std::string&& x) : JsonVariant(x) {}
357 JsonValue(JsonVector* ignored) : JsonVariant(JsonVector()) {}
358 JsonValue(JsonMap* ignored) : JsonVariant(JsonMap()) {}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100359};
360
Nigel Taod0b16cb2020-03-14 10:15:54 +1100361// ----
362
Nigel Tao9d699392020-08-29 10:56:37 +1000363bool //
364escape_needed(const std::string& s) {
365 for (const char& c : s) {
Nigel Tao904004e2020-11-15 20:56:04 +1100366 if ((c == '~') || (c == '/') || (c == '\n') || (c == '\r') || (c == '\t')) {
Nigel Tao9d699392020-08-29 10:56:37 +1000367 return true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100368 }
369 }
Nigel Tao9d699392020-08-29 10:56:37 +1000370 return false;
371}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100372
Nigel Tao9d699392020-08-29 10:56:37 +1000373std::string //
374escape(const std::string& s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100375 std::string e;
376 e.reserve(8 + s.length());
Nigel Tao9d699392020-08-29 10:56:37 +1000377 for (const char& c : s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100378 switch (c) {
379 case '~':
380 e += "~0";
381 break;
382 case '/':
383 e += "~1";
384 break;
385 case '\n':
Nigel Taod60815c2020-03-26 14:32:35 +1100386 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100387 return "";
388 }
389 e += "~n";
390 break;
391 case '\r':
Nigel Taod60815c2020-03-26 14:32:35 +1100392 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100393 return "";
394 }
395 e += "~r";
396 break;
Nigel Tao904004e2020-11-15 20:56:04 +1100397 case '\t':
398 if (g_flags.strict_json_pointer_syntax) {
399 return "";
400 }
401 e += "~t";
402 break;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100403 default:
404 e += c;
405 break;
406 }
407 }
408 return e;
409}
410
411std::string //
Nigel Tao5396dbd2020-08-29 22:02:35 +1000412print_json_pointers(JsonValue& jvalue, uint32_t depth) {
Nigel Tao8098d962020-08-29 10:41:05 +1000413 std::cout << g_dst << '\n';
Nigel Taod60815c2020-03-26 14:32:35 +1100414 if (depth++ >= g_flags.max_output_depth) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100415 return "";
416 }
417
Nigel Tao8098d962020-08-29 10:41:05 +1000418 size_t n = g_dst.size();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000419 if (std::holds_alternative<JsonVector>(jvalue)) {
420 JsonVector& jvector = std::get<JsonVector>(jvalue);
421 g_dst += "/";
422 for (size_t i = 0; i < jvector.size(); i++) {
423 if (i >= g_to_string_cache.size()) {
424 g_to_string_cache.push_back(std::to_string(i));
Nigel Taod0b16cb2020-03-14 10:15:54 +1100425 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000426 g_dst += g_to_string_cache[i];
427 TRY(print_json_pointers(jvector[i], depth));
428 g_dst.resize(n + 1);
429 }
430 g_dst.resize(n);
431 } else if (std::holds_alternative<JsonMap>(jvalue)) {
432 g_dst += "/";
433 for (auto& kv : std::get<JsonMap>(jvalue)) {
434 if (!escape_needed(kv.first)) {
435 g_dst += kv.first;
436 } else {
437 std::string e = escape(kv.first);
438 if (e.empty()) {
Nigel Tao904004e2020-11-15 20:56:04 +1100439 return "main: unsupported \"\\u0009\", \"\\u000A\" or \"\\u000D\" in "
440 "object key";
Nigel Taod0b16cb2020-03-14 10:15:54 +1100441 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000442 g_dst += e;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100443 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000444 TRY(print_json_pointers(kv.second, depth));
445 g_dst.resize(n + 1);
446 }
447 g_dst.resize(n);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100448 }
449 return "";
450}
451
Nigel Taocf6c5782020-08-03 23:43:45 +1000452// ----
453
454class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
455 public:
456 struct Entry {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000457 Entry(JsonValue&& jvalue_arg)
458 : jvalue(std::move(jvalue_arg)), has_map_key(false), map_key() {}
Nigel Taocf6c5782020-08-03 23:43:45 +1000459
Nigel Tao5396dbd2020-08-29 22:02:35 +1000460 JsonValue jvalue;
Nigel Taocf6c5782020-08-03 23:43:45 +1000461 bool has_map_key;
462 std::string map_key;
463 };
464
465 Callbacks() = default;
466
Nigel Tao5396dbd2020-08-29 22:02:35 +1000467 std::string Append(JsonValue&& jvalue) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000468 if (m_stack.empty()) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000469 m_stack.push_back(Entry(std::move(jvalue)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000470 return "";
471 }
472 Entry& top = m_stack.back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000473 if (std::holds_alternative<JsonVector>(top.jvalue)) {
474 std::get<JsonVector>(top.jvalue).push_back(std::move(jvalue));
475 return "";
476 } else if (std::holds_alternative<JsonMap>(top.jvalue)) {
477 JsonMap& jmap = std::get<JsonMap>(top.jvalue);
478 if (top.has_map_key) {
479 top.has_map_key = false;
480 auto iter = jmap.find(top.map_key);
481 if (iter != jmap.end()) {
Nigel Taoe9155432022-12-08 15:06:40 +1100482 return "main: duplicate key: key=" + top.map_key;
Nigel Taocf6c5782020-08-03 23:43:45 +1000483 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000484 jmap.insert(iter, JsonMap::value_type(std::move(top.map_key),
485 std::move(jvalue)));
486 return "";
487 } else if (std::holds_alternative<std::string>(jvalue)) {
488 top.has_map_key = true;
489 top.map_key = std::move(std::get<std::string>(jvalue));
490 return "";
491 }
492 return "main: internal error: non-string map key";
493 } else {
494 return "main: internal error: non-container stack entry";
Nigel Taocf6c5782020-08-03 23:43:45 +1000495 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000496 }
497
Nigel Tao5396dbd2020-08-29 22:02:35 +1000498 std::string AppendNull() override { return Append(JsonValue()); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000499
Nigel Tao5396dbd2020-08-29 22:02:35 +1000500 std::string AppendBool(bool val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000501
Nigel Tao5396dbd2020-08-29 22:02:35 +1000502 std::string AppendI64(int64_t val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000503
Nigel Tao5396dbd2020-08-29 22:02:35 +1000504 std::string AppendF64(double val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000505
Nigel Taoca5da1f2020-08-10 15:26:29 +1000506 std::string AppendTextString(std::string&& val) override {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000507 return Append(JsonValue(std::move(val)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000508 }
509
Nigel Taoca5da1f2020-08-10 15:26:29 +1000510 std::string Push(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000511 if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000512 m_stack.push_back(JsonValue(static_cast<JsonVector*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000513 return "";
514 } else if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000515 m_stack.push_back(JsonValue(static_cast<JsonMap*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000516 return "";
517 }
518 return "main: internal error: bad push";
519 }
520
Nigel Taoca5da1f2020-08-10 15:26:29 +1000521 std::string Pop(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000522 if (m_stack.empty()) {
523 return "main: internal error: bad pop";
524 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000525 JsonValue jvalue = std::move(m_stack.back().jvalue);
Nigel Taocf6c5782020-08-03 23:43:45 +1000526 m_stack.pop_back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000527 return Append(std::move(jvalue));
Nigel Taocf6c5782020-08-03 23:43:45 +1000528 }
529
Nigel Taoca5da1f2020-08-10 15:26:29 +1000530 void Done(wuffs_aux::DecodeJsonResult& result,
531 wuffs_aux::sync_io::Input& input,
532 wuffs_aux::IOBuffer& buffer) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000533 if (!result.error_message.empty()) {
534 return;
535 } else if (m_stack.size() != 1) {
536 result.error_message = "main: internal error: bad depth";
537 return;
Nigel Tao8d779ae2020-08-29 23:10:08 +1000538 } else if (!g_flags.only_parse_dont_output) {
539 result.error_message = print_json_pointers(m_stack.back().jvalue, 0);
Nigel Taocf6c5782020-08-03 23:43:45 +1000540 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000541 }
542
543 private:
544 std::vector<Entry> m_stack;
545};
546
547// ----
548
Nigel Taod0b16cb2020-03-14 10:15:54 +1100549std::string //
550main1(int argc, char** argv) {
551 TRY(parse_flags(argc, argv));
Nigel Taob3438432020-08-13 00:06:56 +1000552 if (!g_flags.strict_json_pointer_syntax) {
Nigel Tao904004e2020-11-15 20:56:04 +1100553 g_quirks.push_back(
554 WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_N_TILDE_R_TILDE_T);
Nigel Taob3438432020-08-13 00:06:56 +1000555 }
Nigel Taod0b16cb2020-03-14 10:15:54 +1100556
Nigel Taocf6c5782020-08-03 23:43:45 +1000557 FILE* in = stdin;
Nigel Taod60815c2020-03-26 14:32:35 +1100558 if (g_flags.remaining_argc > 1) {
559 return g_usage;
560 } else if (g_flags.remaining_argc == 1) {
Nigel Tao816475b2021-07-07 20:28:35 +1000561 in = fopen(g_flags.remaining_argv[0], "rb");
Nigel Taocf6c5782020-08-03 23:43:45 +1000562 if (!in) {
563 return std::string("main: cannot read input file");
Nigel Taod0b16cb2020-03-14 10:15:54 +1100564 }
565 }
566
Nigel Tao2742a4f2020-08-17 00:02:49 +1000567 Callbacks callbacks;
568 wuffs_aux::sync_io::FileInput input(in);
Nigel Taocf6c5782020-08-03 23:43:45 +1000569 return wuffs_aux::DecodeJson(
Nigel Tao2742a4f2020-08-17 00:02:49 +1000570 callbacks, input,
Nigel Taoca335062021-10-18 00:25:13 +1100571 wuffs_aux::DecodeJsonArgQuirks(g_quirks.data(), g_quirks.size()),
572 wuffs_aux::DecodeJsonArgJsonPointer(
573 g_flags.query_c_string ? g_flags.query_c_string : ""))
Nigel Taocf6c5782020-08-03 23:43:45 +1000574 .error_message;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100575}
576
577// ----
578
579int //
580compute_exit_code(std::string status_msg) {
581 if (status_msg.empty()) {
582 return 0;
583 }
Nigel Taofeded882020-08-29 10:32:12 +1000584 std::cerr << status_msg << '\n';
Nigel Taoa51867d2021-05-19 21:34:09 +1000585 // Return an exit code of 1 for regular (foreseen) errors, e.g. badly
Nigel Taod0b16cb2020-03-14 10:15:54 +1100586 // formatted or unsupported input.
587 //
588 // Return an exit code of 2 for internal (exceptional) errors, e.g. defensive
589 // run-time checks found that an internal invariant did not hold.
590 //
591 // Automated testing, including badly formatted inputs, can therefore
592 // discriminate between expected failure (exit code 1) and unexpected failure
593 // (other non-zero exit codes). Specifically, exit code 2 for internal
594 // invariant violation, exit code 139 (which is 128 + SIGSEGV on x86_64
595 // linux) for a segmentation fault (e.g. null pointer dereference).
Nigel Tao255d3a02023-01-26 15:39:23 +1100596 size_t i = status_msg.find('=');
597 if (i != std::string::npos) {
Nigel Taoe9155432022-12-08 15:06:40 +1100598 status_msg = status_msg.substr(0, i);
599 }
Nigel Taod0b16cb2020-03-14 10:15:54 +1100600 return (status_msg.find("internal error:") != std::string::npos) ? 2 : 1;
601}
602
603int //
604main(int argc, char** argv) {
605 std::string z = main1(argc, argv);
606 int exit_code = compute_exit_code(z);
607 return exit_code;
608}