blob: bc766a935b4124a720e592dbebb5c40a05ec4320 [file] [log] [blame]
Nigel Taod0b16cb2020-03-14 10:15:54 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// ----------------
16
17/*
18jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON Pointer
19(RFC 6901) to stdout.
20
Nigel Taod60815c2020-03-26 14:32:35 +110021See the "const char* g_usage" string below for details.
Nigel Taod0b16cb2020-03-14 10:15:54 +110022
23----
24
25This program uses Wuffs' JSON decoder at a relatively high level, building
26in-memory representations of JSON 'things' (e.g. numbers, strings, objects).
27After the entire input has been converted, walking the tree prints the output
Nigel Taocf6c5782020-08-03 23:43:45 +100028(in sorted order). The wuffs_aux::DecodeJson library function converts the
29lower level token stream to higher level callbacks. This .cc file deals only
30with those callbacks, not with tokens per se.
Nigel Taod0b16cb2020-03-14 10:15:54 +110031
32This approach is centered around JSON things. Each JSON thing comprises one or
33more JSON tokens.
34
35An alternative, lower-level approach is in the sibling example/jsonptr program.
36Neither approach is better or worse per se, but when studying this program, be
37aware that there are multiple ways to use Wuffs' JSON decoder.
38
39The two programs, jsonfindptrs and jsonptr, also demonstrate different
40trade-offs with regard to JSON object duplicate keys. The JSON spec permits
41different implementations to allow or reject duplicate keys. It is not always
42clear which approach is safer. Rejecting them is certainly unambiguous, and
43security bugs can lurk in ambiguous corners of a file format, if two different
44implementations both silently accept a file but differ on how to interpret it.
45On the other hand, in the worst case, detecting duplicate keys requires O(N)
46memory, where N is the size of the (potentially untrusted) input.
47
48This program (jsonfindptrs) rejects duplicate keys.
49
50----
51
Nigel Tao50bfab92020-08-05 11:39:09 +100052To run:
Nigel Taod0b16cb2020-03-14 10:15:54 +110053
54$CXX jsonfindptrs.cc && ./a.out < ../../test/data/github-tags.json; rm -f a.out
55
56for a C++ compiler $CXX, such as clang++ or g++.
57*/
58
Nigel Tao5396dbd2020-08-29 22:02:35 +100059#if defined(__cplusplus) && (__cplusplus < 201703L)
60#error "This C++ program requires -std=c++17 or later"
Nigel Tao721190a2020-04-03 22:25:21 +110061#endif
62
Nigel Taocf6c5782020-08-03 23:43:45 +100063#include <stdio.h>
Nigel Tao6b7ce302020-07-07 16:19:46 +100064
Nigel Taod0b16cb2020-03-14 10:15:54 +110065#include <iostream>
66#include <map>
67#include <string>
68#include <vector>
69
Nigel Tao5396dbd2020-08-29 22:02:35 +100070// <variant> requires C++17.
71#include <variant>
72
Nigel Taod0b16cb2020-03-14 10:15:54 +110073// Wuffs ships as a "single file C library" or "header file library" as per
74// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
75//
76// To use that single file as a "foo.c"-like implementation, instead of a
77// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
78// compiling it.
79#define WUFFS_IMPLEMENTATION
80
81// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
82// release/c/etc.c whitelist which parts of Wuffs to build. That file contains
83// the entire Wuffs standard library, implementing a variety of codecs and file
84// formats. Without this macro definition, an optimizing compiler or linker may
85// very well discard Wuffs code for unused codecs, but listing the Wuffs
86// modules we use makes that process explicit. Preprocessing means that such
87// code simply isn't compiled.
88#define WUFFS_CONFIG__MODULES
Nigel Taocf6c5782020-08-03 23:43:45 +100089#define WUFFS_CONFIG__MODULE__AUX__BASE
90#define WUFFS_CONFIG__MODULE__AUX__JSON
Nigel Taod0b16cb2020-03-14 10:15:54 +110091#define WUFFS_CONFIG__MODULE__BASE
92#define WUFFS_CONFIG__MODULE__JSON
93
94// If building this program in an environment that doesn't easily accommodate
95// relative includes, you can use the script/inline-c-relative-includes.go
96// program to generate a stand-alone C++ file.
97#include "../../release/c/wuffs-unsupported-snapshot.c"
98
99#define TRY(error_msg) \
100 do { \
101 std::string z = error_msg; \
102 if (!z.empty()) { \
103 return z; \
104 } \
105 } while (false)
106
Nigel Taod60815c2020-03-26 14:32:35 +1100107static const char* g_usage =
Nigel Taod0b16cb2020-03-14 10:15:54 +1100108 "Usage: jsonfindptrs -flags input.json\n"
109 "\n"
110 "Flags:\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100111 " -d=NUM -max-output-depth=NUM\n"
Nigel Tao8aac6762020-08-12 22:47:45 +1000112 " -q=STR -query=STR\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000113 " -input-allow-comments\n"
114 " -input-allow-extra-comma\n"
115 " -input-allow-inf-nan-numbers\n"
Nigel Tao8d779ae2020-08-29 23:10:08 +1000116 " -only-parse-dont-output\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000117 " -strict-json-pointer-syntax\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100118 "\n"
119 "The input.json filename is optional. If absent, it reads from stdin.\n"
120 "\n"
121 "----\n"
122 "\n"
123 "jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON\n"
124 "Pointer (RFC 6901) to stdout.\n"
125 "\n"
126 "For example, given RFC 6901 section 5's sample input\n"
127 "(https://tools.ietf.org/rfc/rfc6901.txt), this command:\n"
128 " jsonfindptrs rfc-6901-json-pointer.json\n"
129 "will print:\n"
130 " \n"
131 " /\n"
132 " / \n"
133 " /a~1b\n"
134 " /c%d\n"
135 " /e^f\n"
136 " /foo\n"
137 " /foo/0\n"
138 " /foo/1\n"
139 " /g|h\n"
140 " /i\\j\n"
141 " /k\"l\n"
142 " /m~0n\n"
143 "\n"
144 "The first three lines are (1) a 0-byte \"\", (2) a 1-byte \"/\" and (3)\n"
145 "a 2-byte \"/ \". Unlike a file system, the \"/\" JSON Pointer does not\n"
146 "identify the root. Instead, \"\" is the root and \"/\" is the child (the\n"
147 "value in a key-value pair) of the root whose key is the empty string.\n"
148 "Similarly, \"/xyz\" and \"/xyz/\" are two different nodes.\n"
149 "\n"
150 "----\n"
151 "\n"
152 "The JSON specification (https://json.org/) permits implementations that\n"
153 "allow duplicate keys, but this one does not. Conversely, it prints keys\n"
154 "in sorted order, but the overall output is not necessarily sorted\n"
155 "lexicographically. For example, \"/a/9\" would come before \"/a/10\",\n"
156 "and \"/b/c\", a child of \"/b\", would come before \"/b+\".\n"
157 "\n"
158 "This JSON implementation also rejects integer values outside ±M, where\n"
159 "M is ((1<<53)-1), also known as JavaScript's Number.MAX_SAFE_INTEGER.\n"
160 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000161 "The -input-allow-comments flag allows \"/*slash-star*/\" and\n"
162 "\"//slash-slash\" C-style comments within JSON input.\n"
163 "\n"
164 "The -input-allow-extra-comma flag allows input like \"[1,2,]\", with a\n"
Nigel Taoc766bb72020-07-09 12:59:32 +1000165 "comma after the final element of a JSON list or dictionary.\n"
166 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000167 "The -input-allow-inf-nan-numbers flag allows non-finite floating point\n"
168 "numbers (infinities and not-a-numbers) within JSON input.\n"
169 "\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100170 "----\n"
171 "\n"
Nigel Tao8d779ae2020-08-29 23:10:08 +1000172 "The -only-parse-dont-output flag means to write nothing to stdout. An\n"
173 "error message will still be written to stderr if the input is invalid.\n"
174 "\n"
175 "----\n"
176 "\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000177 "The -strict-json-pointer-syntax flag restricts the output lines to\n"
178 "exactly RFC 6901, with only two escape sequences: \"~0\" and \"~1\" for\n"
Nigel Tao904004e2020-11-15 20:56:04 +1100179 "\"~\" and \"/\". Without this flag, this program also lets \"~n\",\n"
180 "\"~r\" and \"~t\" escape the New Line, Carriage Return and Horizontal\n"
181 "Tab ASCII control characters, which can work better with line oriented\n"
182 "(and tab separated) Unix tools that assume exactly one record (e.g. one\n"
183 "JSON Pointer string) per line. With this flag, it fails if the input\n"
184 "JSON's keys contain \"\\u0009\", \"\\u000A\" or \"\\u000D\".\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100185 "\n"
186 "----\n"
187 "\n"
188 "The JSON specification permits implementations to set their own maximum\n"
189 "input depth. This JSON implementation sets it to 1024.\n"
190 "\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100191 "The -d=NUM or -max-output-depth=NUM flag gives the maximum (inclusive)\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100192 "output depth. JSON containers ([] arrays and {} objects) can hold other\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100193 "containers. A bare -d or -max-output-depth is equivalent to -d=1,\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100194 "analogous to the Unix ls command. The flag's absence is equivalent to an\n"
195 "unlimited output depth, analogous to the Unix find command (and hence\n"
196 "the name of this program: jsonfindptrs).";
197
198// ----
199
Nigel Taocf6c5782020-08-03 23:43:45 +1000200std::vector<uint32_t> g_quirks;
201
Nigel Tao8098d962020-08-29 10:41:05 +1000202std::string g_dst;
203
Nigel Taob6c01b32020-08-29 10:46:04 +1000204// g_to_string_cache[i] caches the result of std::to_string(i).
205std::vector<std::string> g_to_string_cache;
206
Nigel Taod0b16cb2020-03-14 10:15:54 +1100207struct {
208 int remaining_argc;
209 char** remaining_argv;
210
Nigel Tao8d779ae2020-08-29 23:10:08 +1000211 bool only_parse_dont_output;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100212 bool strict_json_pointer_syntax;
Nigel Tao0a0c7d62020-08-18 23:31:27 +1000213
214 uint32_t max_output_depth;
215
216 char* query_c_string;
Nigel Taod60815c2020-03-26 14:32:35 +1100217} g_flags = {0};
Nigel Taod0b16cb2020-03-14 10:15:54 +1100218
219std::string //
220parse_flags(int argc, char** argv) {
Nigel Taod60815c2020-03-26 14:32:35 +1100221 g_flags.max_output_depth = 0xFFFFFFFF;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100222
223 int c = (argc > 0) ? 1 : 0; // Skip argv[0], the program name.
224 for (; c < argc; c++) {
225 char* arg = argv[c];
226 if (*arg++ != '-') {
227 break;
228 }
229
230 // A double-dash "--foo" is equivalent to a single-dash "-foo". As special
231 // cases, a bare "-" is not a flag (some programs may interpret it as
232 // stdin) and a bare "--" means to stop parsing flags.
233 if (*arg == '\x00') {
234 break;
235 } else if (*arg == '-') {
236 arg++;
237 if (*arg == '\x00') {
238 c++;
239 break;
240 }
241 }
242
Nigel Tao94440cf2020-04-02 22:28:24 +1100243 if (!strcmp(arg, "d") || !strcmp(arg, "max-output-depth")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100244 g_flags.max_output_depth = 1;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100245 continue;
Nigel Tao94440cf2020-04-02 22:28:24 +1100246 } else if (!strncmp(arg, "d=", 2) ||
Nigel Taod0b16cb2020-03-14 10:15:54 +1100247 !strncmp(arg, "max-output-depth=", 16)) {
248 while (*arg++ != '=') {
249 }
250 wuffs_base__result_u64 u = wuffs_base__parse_number_u64(
Nigel Tao6b7ce302020-07-07 16:19:46 +1000251 wuffs_base__make_slice_u8((uint8_t*)arg, strlen(arg)),
252 WUFFS_BASE__PARSE_NUMBER_XXX__DEFAULT_OPTIONS);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100253 if (wuffs_base__status__is_ok(&u.status) && (u.value <= 0xFFFFFFFF)) {
Nigel Taod60815c2020-03-26 14:32:35 +1100254 g_flags.max_output_depth = (uint32_t)(u.value);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100255 continue;
256 }
Nigel Taod60815c2020-03-26 14:32:35 +1100257 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100258 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000259 if (!strcmp(arg, "input-allow-comments")) {
260 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
261 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
262 continue;
263 }
264 if (!strcmp(arg, "input-allow-extra-comma")) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000265 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
Nigel Taoc766bb72020-07-09 12:59:32 +1000266 continue;
267 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000268 if (!strcmp(arg, "input-allow-inf-nan-numbers")) {
269 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS);
270 continue;
271 }
Nigel Tao8aac6762020-08-12 22:47:45 +1000272 if (!strncmp(arg, "q=", 2) || !strncmp(arg, "query=", 6)) {
273 while (*arg++ != '=') {
274 }
275 g_flags.query_c_string = arg;
276 continue;
277 }
Nigel Tao8d779ae2020-08-29 23:10:08 +1000278 if (!strcmp(arg, "only-parse-dont-output")) {
279 g_flags.only_parse_dont_output = true;
280 continue;
281 }
Nigel Taoecadf722020-07-13 08:22:34 +1000282 if (!strcmp(arg, "strict-json-pointer-syntax")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100283 g_flags.strict_json_pointer_syntax = true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100284 continue;
285 }
286
Nigel Taod60815c2020-03-26 14:32:35 +1100287 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100288 }
289
Nigel Taod60815c2020-03-26 14:32:35 +1100290 g_flags.remaining_argc = argc - c;
291 g_flags.remaining_argv = argv + c;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100292 return "";
293}
294
Nigel Tao6b7ce302020-07-07 16:19:46 +1000295// ----
Nigel Taod0b16cb2020-03-14 10:15:54 +1100296
Nigel Tao5396dbd2020-08-29 22:02:35 +1000297struct JsonValue;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100298
Nigel Tao5396dbd2020-08-29 22:02:35 +1000299using JsonVector = std::vector<JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100300
Nigel Tao5396dbd2020-08-29 22:02:35 +1000301// We use a std::map in this example program to avoid dependencies outside of
302// the C++ standard library. If you're copy/pasting this JsonValue code,
303// consider a more efficient data structure such as an absl::btree_map.
304//
305// See CppCon 2014: Chandler Carruth "Efficiency with Algorithms, Performance
306// with Data Structures" at https://www.youtube.com/watch?v=fHNmRkzxHWs
307using JsonMap = std::map<std::string, JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100308
Nigel Tao5396dbd2020-08-29 22:02:35 +1000309using JsonVariant = std::variant<std::monostate,
310 bool,
311 int64_t,
312 double,
313 std::string,
314 JsonVector,
315 JsonMap>;
316
317struct JsonValue : JsonVariant {
318 JsonValue() : JsonVariant() {}
319 JsonValue(bool x) : JsonVariant(x) {}
320 JsonValue(int64_t x) : JsonVariant(x) {}
321 JsonValue(double x) : JsonVariant(x) {}
322 JsonValue(std::string&& x) : JsonVariant(x) {}
323 JsonValue(JsonVector* ignored) : JsonVariant(JsonVector()) {}
324 JsonValue(JsonMap* ignored) : JsonVariant(JsonMap()) {}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100325};
326
Nigel Taod0b16cb2020-03-14 10:15:54 +1100327// ----
328
Nigel Tao9d699392020-08-29 10:56:37 +1000329bool //
330escape_needed(const std::string& s) {
331 for (const char& c : s) {
Nigel Tao904004e2020-11-15 20:56:04 +1100332 if ((c == '~') || (c == '/') || (c == '\n') || (c == '\r') || (c == '\t')) {
Nigel Tao9d699392020-08-29 10:56:37 +1000333 return true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100334 }
335 }
Nigel Tao9d699392020-08-29 10:56:37 +1000336 return false;
337}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100338
Nigel Tao9d699392020-08-29 10:56:37 +1000339std::string //
340escape(const std::string& s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100341 std::string e;
342 e.reserve(8 + s.length());
Nigel Tao9d699392020-08-29 10:56:37 +1000343 for (const char& c : s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100344 switch (c) {
345 case '~':
346 e += "~0";
347 break;
348 case '/':
349 e += "~1";
350 break;
351 case '\n':
Nigel Taod60815c2020-03-26 14:32:35 +1100352 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100353 return "";
354 }
355 e += "~n";
356 break;
357 case '\r':
Nigel Taod60815c2020-03-26 14:32:35 +1100358 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100359 return "";
360 }
361 e += "~r";
362 break;
Nigel Tao904004e2020-11-15 20:56:04 +1100363 case '\t':
364 if (g_flags.strict_json_pointer_syntax) {
365 return "";
366 }
367 e += "~t";
368 break;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100369 default:
370 e += c;
371 break;
372 }
373 }
374 return e;
375}
376
377std::string //
Nigel Tao5396dbd2020-08-29 22:02:35 +1000378print_json_pointers(JsonValue& jvalue, uint32_t depth) {
Nigel Tao8098d962020-08-29 10:41:05 +1000379 std::cout << g_dst << '\n';
Nigel Taod60815c2020-03-26 14:32:35 +1100380 if (depth++ >= g_flags.max_output_depth) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100381 return "";
382 }
383
Nigel Tao8098d962020-08-29 10:41:05 +1000384 size_t n = g_dst.size();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000385 if (std::holds_alternative<JsonVector>(jvalue)) {
386 JsonVector& jvector = std::get<JsonVector>(jvalue);
387 g_dst += "/";
388 for (size_t i = 0; i < jvector.size(); i++) {
389 if (i >= g_to_string_cache.size()) {
390 g_to_string_cache.push_back(std::to_string(i));
Nigel Taod0b16cb2020-03-14 10:15:54 +1100391 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000392 g_dst += g_to_string_cache[i];
393 TRY(print_json_pointers(jvector[i], depth));
394 g_dst.resize(n + 1);
395 }
396 g_dst.resize(n);
397 } else if (std::holds_alternative<JsonMap>(jvalue)) {
398 g_dst += "/";
399 for (auto& kv : std::get<JsonMap>(jvalue)) {
400 if (!escape_needed(kv.first)) {
401 g_dst += kv.first;
402 } else {
403 std::string e = escape(kv.first);
404 if (e.empty()) {
Nigel Tao904004e2020-11-15 20:56:04 +1100405 return "main: unsupported \"\\u0009\", \"\\u000A\" or \"\\u000D\" in "
406 "object key";
Nigel Taod0b16cb2020-03-14 10:15:54 +1100407 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000408 g_dst += e;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100409 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000410 TRY(print_json_pointers(kv.second, depth));
411 g_dst.resize(n + 1);
412 }
413 g_dst.resize(n);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100414 }
415 return "";
416}
417
Nigel Taocf6c5782020-08-03 23:43:45 +1000418// ----
419
420class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
421 public:
422 struct Entry {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000423 Entry(JsonValue&& jvalue_arg)
424 : jvalue(std::move(jvalue_arg)), has_map_key(false), map_key() {}
Nigel Taocf6c5782020-08-03 23:43:45 +1000425
Nigel Tao5396dbd2020-08-29 22:02:35 +1000426 JsonValue jvalue;
Nigel Taocf6c5782020-08-03 23:43:45 +1000427 bool has_map_key;
428 std::string map_key;
429 };
430
431 Callbacks() = default;
432
Nigel Tao5396dbd2020-08-29 22:02:35 +1000433 std::string Append(JsonValue&& jvalue) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000434 if (m_stack.empty()) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000435 m_stack.push_back(Entry(std::move(jvalue)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000436 return "";
437 }
438 Entry& top = m_stack.back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000439 if (std::holds_alternative<JsonVector>(top.jvalue)) {
440 std::get<JsonVector>(top.jvalue).push_back(std::move(jvalue));
441 return "";
442 } else if (std::holds_alternative<JsonMap>(top.jvalue)) {
443 JsonMap& jmap = std::get<JsonMap>(top.jvalue);
444 if (top.has_map_key) {
445 top.has_map_key = false;
446 auto iter = jmap.find(top.map_key);
447 if (iter != jmap.end()) {
448 return "main: duplicate key: " + top.map_key;
Nigel Taocf6c5782020-08-03 23:43:45 +1000449 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000450 jmap.insert(iter, JsonMap::value_type(std::move(top.map_key),
451 std::move(jvalue)));
452 return "";
453 } else if (std::holds_alternative<std::string>(jvalue)) {
454 top.has_map_key = true;
455 top.map_key = std::move(std::get<std::string>(jvalue));
456 return "";
457 }
458 return "main: internal error: non-string map key";
459 } else {
460 return "main: internal error: non-container stack entry";
Nigel Taocf6c5782020-08-03 23:43:45 +1000461 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000462 }
463
Nigel Tao5396dbd2020-08-29 22:02:35 +1000464 std::string AppendNull() override { return Append(JsonValue()); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000465
Nigel Tao5396dbd2020-08-29 22:02:35 +1000466 std::string AppendBool(bool val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000467
Nigel Tao5396dbd2020-08-29 22:02:35 +1000468 std::string AppendI64(int64_t val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000469
Nigel Tao5396dbd2020-08-29 22:02:35 +1000470 std::string AppendF64(double val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000471
Nigel Taoca5da1f2020-08-10 15:26:29 +1000472 std::string AppendTextString(std::string&& val) override {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000473 return Append(JsonValue(std::move(val)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000474 }
475
Nigel Taoca5da1f2020-08-10 15:26:29 +1000476 std::string Push(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000477 if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000478 m_stack.push_back(JsonValue(static_cast<JsonVector*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000479 return "";
480 } else if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000481 m_stack.push_back(JsonValue(static_cast<JsonMap*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000482 return "";
483 }
484 return "main: internal error: bad push";
485 }
486
Nigel Taoca5da1f2020-08-10 15:26:29 +1000487 std::string Pop(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000488 if (m_stack.empty()) {
489 return "main: internal error: bad pop";
490 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000491 JsonValue jvalue = std::move(m_stack.back().jvalue);
Nigel Taocf6c5782020-08-03 23:43:45 +1000492 m_stack.pop_back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000493 return Append(std::move(jvalue));
Nigel Taocf6c5782020-08-03 23:43:45 +1000494 }
495
Nigel Taoca5da1f2020-08-10 15:26:29 +1000496 void Done(wuffs_aux::DecodeJsonResult& result,
497 wuffs_aux::sync_io::Input& input,
498 wuffs_aux::IOBuffer& buffer) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000499 if (!result.error_message.empty()) {
500 return;
501 } else if (m_stack.size() != 1) {
502 result.error_message = "main: internal error: bad depth";
503 return;
Nigel Tao8d779ae2020-08-29 23:10:08 +1000504 } else if (!g_flags.only_parse_dont_output) {
505 result.error_message = print_json_pointers(m_stack.back().jvalue, 0);
Nigel Taocf6c5782020-08-03 23:43:45 +1000506 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000507 }
508
509 private:
510 std::vector<Entry> m_stack;
511};
512
513// ----
514
Nigel Taod0b16cb2020-03-14 10:15:54 +1100515std::string //
516main1(int argc, char** argv) {
517 TRY(parse_flags(argc, argv));
Nigel Taob3438432020-08-13 00:06:56 +1000518 if (!g_flags.strict_json_pointer_syntax) {
Nigel Tao904004e2020-11-15 20:56:04 +1100519 g_quirks.push_back(
520 WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_N_TILDE_R_TILDE_T);
Nigel Taob3438432020-08-13 00:06:56 +1000521 }
Nigel Taod0b16cb2020-03-14 10:15:54 +1100522
Nigel Taocf6c5782020-08-03 23:43:45 +1000523 FILE* in = stdin;
Nigel Taod60815c2020-03-26 14:32:35 +1100524 if (g_flags.remaining_argc > 1) {
525 return g_usage;
526 } else if (g_flags.remaining_argc == 1) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000527 in = fopen(g_flags.remaining_argv[0], "r");
528 if (!in) {
529 return std::string("main: cannot read input file");
Nigel Taod0b16cb2020-03-14 10:15:54 +1100530 }
531 }
532
Nigel Tao2742a4f2020-08-17 00:02:49 +1000533 Callbacks callbacks;
534 wuffs_aux::sync_io::FileInput input(in);
Nigel Taocf6c5782020-08-03 23:43:45 +1000535 return wuffs_aux::DecodeJson(
Nigel Tao2742a4f2020-08-17 00:02:49 +1000536 callbacks, input,
Nigel Tao8aac6762020-08-12 22:47:45 +1000537 wuffs_base__make_slice_u32(g_quirks.data(), g_quirks.size()),
538 (g_flags.query_c_string ? g_flags.query_c_string : ""))
Nigel Taocf6c5782020-08-03 23:43:45 +1000539 .error_message;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100540}
541
542// ----
543
544int //
545compute_exit_code(std::string status_msg) {
546 if (status_msg.empty()) {
547 return 0;
548 }
Nigel Taofeded882020-08-29 10:32:12 +1000549 std::cerr << status_msg << '\n';
Nigel Taod0b16cb2020-03-14 10:15:54 +1100550 // Return an exit code of 1 for regular (forseen) errors, e.g. badly
551 // formatted or unsupported input.
552 //
553 // Return an exit code of 2 for internal (exceptional) errors, e.g. defensive
554 // run-time checks found that an internal invariant did not hold.
555 //
556 // Automated testing, including badly formatted inputs, can therefore
557 // discriminate between expected failure (exit code 1) and unexpected failure
558 // (other non-zero exit codes). Specifically, exit code 2 for internal
559 // invariant violation, exit code 139 (which is 128 + SIGSEGV on x86_64
560 // linux) for a segmentation fault (e.g. null pointer dereference).
561 return (status_msg.find("internal error:") != std::string::npos) ? 2 : 1;
562}
563
564int //
565main(int argc, char** argv) {
566 std::string z = main1(argc, argv);
567 int exit_code = compute_exit_code(z);
568 return exit_code;
569}