blob: 56e02cfcfadbf540c7278adee12909b1f599ea9d [file] [log] [blame]
Nigel Taod0b16cb2020-03-14 10:15:54 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// ----------------
16
17/*
18jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON Pointer
19(RFC 6901) to stdout.
20
Nigel Taod60815c2020-03-26 14:32:35 +110021See the "const char* g_usage" string below for details.
Nigel Taod0b16cb2020-03-14 10:15:54 +110022
23----
24
25This program uses Wuffs' JSON decoder at a relatively high level, building
26in-memory representations of JSON 'things' (e.g. numbers, strings, objects).
27After the entire input has been converted, walking the tree prints the output
Nigel Taocf6c5782020-08-03 23:43:45 +100028(in sorted order). The wuffs_aux::DecodeJson library function converts the
29lower level token stream to higher level callbacks. This .cc file deals only
30with those callbacks, not with tokens per se.
Nigel Taod0b16cb2020-03-14 10:15:54 +110031
32This approach is centered around JSON things. Each JSON thing comprises one or
33more JSON tokens.
34
35An alternative, lower-level approach is in the sibling example/jsonptr program.
36Neither approach is better or worse per se, but when studying this program, be
37aware that there are multiple ways to use Wuffs' JSON decoder.
38
39The two programs, jsonfindptrs and jsonptr, also demonstrate different
40trade-offs with regard to JSON object duplicate keys. The JSON spec permits
41different implementations to allow or reject duplicate keys. It is not always
42clear which approach is safer. Rejecting them is certainly unambiguous, and
43security bugs can lurk in ambiguous corners of a file format, if two different
44implementations both silently accept a file but differ on how to interpret it.
45On the other hand, in the worst case, detecting duplicate keys requires O(N)
46memory, where N is the size of the (potentially untrusted) input.
47
48This program (jsonfindptrs) rejects duplicate keys.
49
50----
51
Nigel Tao50bfab92020-08-05 11:39:09 +100052To run:
Nigel Taod0b16cb2020-03-14 10:15:54 +110053
54$CXX jsonfindptrs.cc && ./a.out < ../../test/data/github-tags.json; rm -f a.out
55
56for a C++ compiler $CXX, such as clang++ or g++.
57*/
58
Nigel Tao5396dbd2020-08-29 22:02:35 +100059#if defined(__cplusplus) && (__cplusplus < 201703L)
60#error "This C++ program requires -std=c++17 or later"
Nigel Tao721190a2020-04-03 22:25:21 +110061#endif
62
Nigel Taocf6c5782020-08-03 23:43:45 +100063#include <stdio.h>
Nigel Tao6b7ce302020-07-07 16:19:46 +100064
Nigel Taod0b16cb2020-03-14 10:15:54 +110065#include <iostream>
66#include <map>
67#include <string>
68#include <vector>
69
Nigel Tao5396dbd2020-08-29 22:02:35 +100070// <variant> requires C++17.
71#include <variant>
72
Nigel Taod0b16cb2020-03-14 10:15:54 +110073// Wuffs ships as a "single file C library" or "header file library" as per
74// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
75//
76// To use that single file as a "foo.c"-like implementation, instead of a
77// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
78// compiling it.
79#define WUFFS_IMPLEMENTATION
80
81// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
Nigel Tao2f788042021-01-23 19:29:19 +110082// release/c/etc.c choose which parts of Wuffs to build. That file contains the
83// entire Wuffs standard library, implementing a variety of codecs and file
Nigel Taod0b16cb2020-03-14 10:15:54 +110084// formats. Without this macro definition, an optimizing compiler or linker may
85// very well discard Wuffs code for unused codecs, but listing the Wuffs
86// modules we use makes that process explicit. Preprocessing means that such
87// code simply isn't compiled.
88#define WUFFS_CONFIG__MODULES
Nigel Taocf6c5782020-08-03 23:43:45 +100089#define WUFFS_CONFIG__MODULE__AUX__BASE
90#define WUFFS_CONFIG__MODULE__AUX__JSON
Nigel Taod0b16cb2020-03-14 10:15:54 +110091#define WUFFS_CONFIG__MODULE__BASE
92#define WUFFS_CONFIG__MODULE__JSON
93
94// If building this program in an environment that doesn't easily accommodate
95// relative includes, you can use the script/inline-c-relative-includes.go
96// program to generate a stand-alone C++ file.
97#include "../../release/c/wuffs-unsupported-snapshot.c"
98
99#define TRY(error_msg) \
100 do { \
101 std::string z = error_msg; \
102 if (!z.empty()) { \
103 return z; \
104 } \
105 } while (false)
106
Nigel Taod60815c2020-03-26 14:32:35 +1100107static const char* g_usage =
Nigel Taod0b16cb2020-03-14 10:15:54 +1100108 "Usage: jsonfindptrs -flags input.json\n"
109 "\n"
110 "Flags:\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100111 " -d=NUM -max-output-depth=NUM\n"
Nigel Tao8aac6762020-08-12 22:47:45 +1000112 " -q=STR -query=STR\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000113 " -input-allow-comments\n"
114 " -input-allow-extra-comma\n"
115 " -input-allow-inf-nan-numbers\n"
Nigel Tao04126792021-02-22 12:23:57 +1100116 " -input-jwcc\n"
117 " -jwcc\n"
Nigel Tao8d779ae2020-08-29 23:10:08 +1000118 " -only-parse-dont-output\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000119 " -strict-json-pointer-syntax\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100120 "\n"
121 "The input.json filename is optional. If absent, it reads from stdin.\n"
122 "\n"
123 "----\n"
124 "\n"
125 "jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON\n"
126 "Pointer (RFC 6901) to stdout.\n"
127 "\n"
128 "For example, given RFC 6901 section 5's sample input\n"
129 "(https://tools.ietf.org/rfc/rfc6901.txt), this command:\n"
130 " jsonfindptrs rfc-6901-json-pointer.json\n"
131 "will print:\n"
132 " \n"
133 " /\n"
134 " / \n"
135 " /a~1b\n"
136 " /c%d\n"
137 " /e^f\n"
138 " /foo\n"
139 " /foo/0\n"
140 " /foo/1\n"
141 " /g|h\n"
142 " /i\\j\n"
143 " /k\"l\n"
144 " /m~0n\n"
145 "\n"
146 "The first three lines are (1) a 0-byte \"\", (2) a 1-byte \"/\" and (3)\n"
147 "a 2-byte \"/ \". Unlike a file system, the \"/\" JSON Pointer does not\n"
148 "identify the root. Instead, \"\" is the root and \"/\" is the child (the\n"
149 "value in a key-value pair) of the root whose key is the empty string.\n"
150 "Similarly, \"/xyz\" and \"/xyz/\" are two different nodes.\n"
151 "\n"
152 "----\n"
153 "\n"
154 "The JSON specification (https://json.org/) permits implementations that\n"
155 "allow duplicate keys, but this one does not. Conversely, it prints keys\n"
156 "in sorted order, but the overall output is not necessarily sorted\n"
157 "lexicographically. For example, \"/a/9\" would come before \"/a/10\",\n"
158 "and \"/b/c\", a child of \"/b\", would come before \"/b+\".\n"
159 "\n"
160 "This JSON implementation also rejects integer values outside ±M, where\n"
161 "M is ((1<<53)-1), also known as JavaScript's Number.MAX_SAFE_INTEGER.\n"
162 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000163 "The -input-allow-comments flag allows \"/*slash-star*/\" and\n"
164 "\"//slash-slash\" C-style comments within JSON input.\n"
165 "\n"
166 "The -input-allow-extra-comma flag allows input like \"[1,2,]\", with a\n"
Nigel Taoc766bb72020-07-09 12:59:32 +1000167 "comma after the final element of a JSON list or dictionary.\n"
168 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000169 "The -input-allow-inf-nan-numbers flag allows non-finite floating point\n"
170 "numbers (infinities and not-a-numbers) within JSON input.\n"
171 "\n"
Nigel Tao04126792021-02-22 12:23:57 +1100172 "Combining some of those flags results in speaking JWCC (JSON With Commas\n"
173 "and Comments), not plain JSON. For convenience, the -input-jwcc or -jwcc\n"
174 "flags enables all of:\n"
175 " -input-allow-comments\n"
176 " -input-allow-extra-comma\n"
177 "\n"
178#if defined(WUFFS_EXAMPLE_SPEAK_JWCC_NOT_JSON)
179 "This program was configured at compile time to always use -jwcc.\n"
180 "\n"
181#endif
Nigel Taod0b16cb2020-03-14 10:15:54 +1100182 "----\n"
183 "\n"
Nigel Tao8d779ae2020-08-29 23:10:08 +1000184 "The -only-parse-dont-output flag means to write nothing to stdout. An\n"
185 "error message will still be written to stderr if the input is invalid.\n"
186 "\n"
187 "----\n"
188 "\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000189 "The -strict-json-pointer-syntax flag restricts the output lines to\n"
190 "exactly RFC 6901, with only two escape sequences: \"~0\" and \"~1\" for\n"
Nigel Tao904004e2020-11-15 20:56:04 +1100191 "\"~\" and \"/\". Without this flag, this program also lets \"~n\",\n"
192 "\"~r\" and \"~t\" escape the New Line, Carriage Return and Horizontal\n"
193 "Tab ASCII control characters, which can work better with line oriented\n"
194 "(and tab separated) Unix tools that assume exactly one record (e.g. one\n"
195 "JSON Pointer string) per line. With this flag, it fails if the input\n"
196 "JSON's keys contain \"\\u0009\", \"\\u000A\" or \"\\u000D\".\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100197 "\n"
198 "----\n"
199 "\n"
200 "The JSON specification permits implementations to set their own maximum\n"
201 "input depth. This JSON implementation sets it to 1024.\n"
202 "\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100203 "The -d=NUM or -max-output-depth=NUM flag gives the maximum (inclusive)\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100204 "output depth. JSON containers ([] arrays and {} objects) can hold other\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100205 "containers. A bare -d or -max-output-depth is equivalent to -d=1,\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100206 "analogous to the Unix ls command. The flag's absence is equivalent to an\n"
207 "unlimited output depth, analogous to the Unix find command (and hence\n"
208 "the name of this program: jsonfindptrs).";
209
210// ----
211
Nigel Taocf6c5782020-08-03 23:43:45 +1000212std::vector<uint32_t> g_quirks;
213
Nigel Tao8098d962020-08-29 10:41:05 +1000214std::string g_dst;
215
Nigel Taob6c01b32020-08-29 10:46:04 +1000216// g_to_string_cache[i] caches the result of std::to_string(i).
217std::vector<std::string> g_to_string_cache;
218
Nigel Taod0b16cb2020-03-14 10:15:54 +1100219struct {
220 int remaining_argc;
221 char** remaining_argv;
222
Nigel Tao8d779ae2020-08-29 23:10:08 +1000223 bool only_parse_dont_output;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100224 bool strict_json_pointer_syntax;
Nigel Tao0a0c7d62020-08-18 23:31:27 +1000225
226 uint32_t max_output_depth;
227
228 char* query_c_string;
Nigel Taod60815c2020-03-26 14:32:35 +1100229} g_flags = {0};
Nigel Taod0b16cb2020-03-14 10:15:54 +1100230
231std::string //
232parse_flags(int argc, char** argv) {
Nigel Taod60815c2020-03-26 14:32:35 +1100233 g_flags.max_output_depth = 0xFFFFFFFF;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100234
Nigel Tao04126792021-02-22 12:23:57 +1100235#if defined(WUFFS_EXAMPLE_SPEAK_JWCC_NOT_JSON)
236 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
237 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
238 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
239#endif
240
Nigel Taod0b16cb2020-03-14 10:15:54 +1100241 int c = (argc > 0) ? 1 : 0; // Skip argv[0], the program name.
242 for (; c < argc; c++) {
243 char* arg = argv[c];
244 if (*arg++ != '-') {
245 break;
246 }
247
248 // A double-dash "--foo" is equivalent to a single-dash "-foo". As special
249 // cases, a bare "-" is not a flag (some programs may interpret it as
250 // stdin) and a bare "--" means to stop parsing flags.
251 if (*arg == '\x00') {
252 break;
253 } else if (*arg == '-') {
254 arg++;
255 if (*arg == '\x00') {
256 c++;
257 break;
258 }
259 }
260
Nigel Tao94440cf2020-04-02 22:28:24 +1100261 if (!strcmp(arg, "d") || !strcmp(arg, "max-output-depth")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100262 g_flags.max_output_depth = 1;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100263 continue;
Nigel Tao94440cf2020-04-02 22:28:24 +1100264 } else if (!strncmp(arg, "d=", 2) ||
Nigel Taod0b16cb2020-03-14 10:15:54 +1100265 !strncmp(arg, "max-output-depth=", 16)) {
266 while (*arg++ != '=') {
267 }
268 wuffs_base__result_u64 u = wuffs_base__parse_number_u64(
Nigel Tao6b7ce302020-07-07 16:19:46 +1000269 wuffs_base__make_slice_u8((uint8_t*)arg, strlen(arg)),
270 WUFFS_BASE__PARSE_NUMBER_XXX__DEFAULT_OPTIONS);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100271 if (wuffs_base__status__is_ok(&u.status) && (u.value <= 0xFFFFFFFF)) {
Nigel Taod60815c2020-03-26 14:32:35 +1100272 g_flags.max_output_depth = (uint32_t)(u.value);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100273 continue;
274 }
Nigel Taod60815c2020-03-26 14:32:35 +1100275 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100276 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000277 if (!strcmp(arg, "input-allow-comments")) {
278 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
279 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
280 continue;
281 }
282 if (!strcmp(arg, "input-allow-extra-comma")) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000283 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
Nigel Taoc766bb72020-07-09 12:59:32 +1000284 continue;
285 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000286 if (!strcmp(arg, "input-allow-inf-nan-numbers")) {
287 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS);
288 continue;
289 }
Nigel Tao04126792021-02-22 12:23:57 +1100290 if (!strcmp(arg, "input-jwcc") || !strcmp(arg, "jwcc")) {
291 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
292 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
293 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
294 continue;
295 }
Nigel Tao8aac6762020-08-12 22:47:45 +1000296 if (!strncmp(arg, "q=", 2) || !strncmp(arg, "query=", 6)) {
297 while (*arg++ != '=') {
298 }
299 g_flags.query_c_string = arg;
300 continue;
301 }
Nigel Tao8d779ae2020-08-29 23:10:08 +1000302 if (!strcmp(arg, "only-parse-dont-output")) {
303 g_flags.only_parse_dont_output = true;
304 continue;
305 }
Nigel Taoecadf722020-07-13 08:22:34 +1000306 if (!strcmp(arg, "strict-json-pointer-syntax")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100307 g_flags.strict_json_pointer_syntax = true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100308 continue;
309 }
310
Nigel Taod60815c2020-03-26 14:32:35 +1100311 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100312 }
313
Nigel Taod60815c2020-03-26 14:32:35 +1100314 g_flags.remaining_argc = argc - c;
315 g_flags.remaining_argv = argv + c;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100316 return "";
317}
318
Nigel Tao6b7ce302020-07-07 16:19:46 +1000319// ----
Nigel Taod0b16cb2020-03-14 10:15:54 +1100320
Nigel Tao5396dbd2020-08-29 22:02:35 +1000321struct JsonValue;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100322
Nigel Tao5396dbd2020-08-29 22:02:35 +1000323using JsonVector = std::vector<JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100324
Nigel Tao5396dbd2020-08-29 22:02:35 +1000325// We use a std::map in this example program to avoid dependencies outside of
326// the C++ standard library. If you're copy/pasting this JsonValue code,
327// consider a more efficient data structure such as an absl::btree_map.
328//
329// See CppCon 2014: Chandler Carruth "Efficiency with Algorithms, Performance
330// with Data Structures" at https://www.youtube.com/watch?v=fHNmRkzxHWs
331using JsonMap = std::map<std::string, JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100332
Nigel Tao5396dbd2020-08-29 22:02:35 +1000333using JsonVariant = std::variant<std::monostate,
334 bool,
335 int64_t,
336 double,
337 std::string,
338 JsonVector,
339 JsonMap>;
340
341struct JsonValue : JsonVariant {
342 JsonValue() : JsonVariant() {}
343 JsonValue(bool x) : JsonVariant(x) {}
344 JsonValue(int64_t x) : JsonVariant(x) {}
345 JsonValue(double x) : JsonVariant(x) {}
346 JsonValue(std::string&& x) : JsonVariant(x) {}
347 JsonValue(JsonVector* ignored) : JsonVariant(JsonVector()) {}
348 JsonValue(JsonMap* ignored) : JsonVariant(JsonMap()) {}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100349};
350
Nigel Taod0b16cb2020-03-14 10:15:54 +1100351// ----
352
Nigel Tao9d699392020-08-29 10:56:37 +1000353bool //
354escape_needed(const std::string& s) {
355 for (const char& c : s) {
Nigel Tao904004e2020-11-15 20:56:04 +1100356 if ((c == '~') || (c == '/') || (c == '\n') || (c == '\r') || (c == '\t')) {
Nigel Tao9d699392020-08-29 10:56:37 +1000357 return true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100358 }
359 }
Nigel Tao9d699392020-08-29 10:56:37 +1000360 return false;
361}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100362
Nigel Tao9d699392020-08-29 10:56:37 +1000363std::string //
364escape(const std::string& s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100365 std::string e;
366 e.reserve(8 + s.length());
Nigel Tao9d699392020-08-29 10:56:37 +1000367 for (const char& c : s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100368 switch (c) {
369 case '~':
370 e += "~0";
371 break;
372 case '/':
373 e += "~1";
374 break;
375 case '\n':
Nigel Taod60815c2020-03-26 14:32:35 +1100376 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100377 return "";
378 }
379 e += "~n";
380 break;
381 case '\r':
Nigel Taod60815c2020-03-26 14:32:35 +1100382 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100383 return "";
384 }
385 e += "~r";
386 break;
Nigel Tao904004e2020-11-15 20:56:04 +1100387 case '\t':
388 if (g_flags.strict_json_pointer_syntax) {
389 return "";
390 }
391 e += "~t";
392 break;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100393 default:
394 e += c;
395 break;
396 }
397 }
398 return e;
399}
400
401std::string //
Nigel Tao5396dbd2020-08-29 22:02:35 +1000402print_json_pointers(JsonValue& jvalue, uint32_t depth) {
Nigel Tao8098d962020-08-29 10:41:05 +1000403 std::cout << g_dst << '\n';
Nigel Taod60815c2020-03-26 14:32:35 +1100404 if (depth++ >= g_flags.max_output_depth) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100405 return "";
406 }
407
Nigel Tao8098d962020-08-29 10:41:05 +1000408 size_t n = g_dst.size();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000409 if (std::holds_alternative<JsonVector>(jvalue)) {
410 JsonVector& jvector = std::get<JsonVector>(jvalue);
411 g_dst += "/";
412 for (size_t i = 0; i < jvector.size(); i++) {
413 if (i >= g_to_string_cache.size()) {
414 g_to_string_cache.push_back(std::to_string(i));
Nigel Taod0b16cb2020-03-14 10:15:54 +1100415 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000416 g_dst += g_to_string_cache[i];
417 TRY(print_json_pointers(jvector[i], depth));
418 g_dst.resize(n + 1);
419 }
420 g_dst.resize(n);
421 } else if (std::holds_alternative<JsonMap>(jvalue)) {
422 g_dst += "/";
423 for (auto& kv : std::get<JsonMap>(jvalue)) {
424 if (!escape_needed(kv.first)) {
425 g_dst += kv.first;
426 } else {
427 std::string e = escape(kv.first);
428 if (e.empty()) {
Nigel Tao904004e2020-11-15 20:56:04 +1100429 return "main: unsupported \"\\u0009\", \"\\u000A\" or \"\\u000D\" in "
430 "object key";
Nigel Taod0b16cb2020-03-14 10:15:54 +1100431 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000432 g_dst += e;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100433 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000434 TRY(print_json_pointers(kv.second, depth));
435 g_dst.resize(n + 1);
436 }
437 g_dst.resize(n);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100438 }
439 return "";
440}
441
Nigel Taocf6c5782020-08-03 23:43:45 +1000442// ----
443
444class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
445 public:
446 struct Entry {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000447 Entry(JsonValue&& jvalue_arg)
448 : jvalue(std::move(jvalue_arg)), has_map_key(false), map_key() {}
Nigel Taocf6c5782020-08-03 23:43:45 +1000449
Nigel Tao5396dbd2020-08-29 22:02:35 +1000450 JsonValue jvalue;
Nigel Taocf6c5782020-08-03 23:43:45 +1000451 bool has_map_key;
452 std::string map_key;
453 };
454
455 Callbacks() = default;
456
Nigel Tao5396dbd2020-08-29 22:02:35 +1000457 std::string Append(JsonValue&& jvalue) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000458 if (m_stack.empty()) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000459 m_stack.push_back(Entry(std::move(jvalue)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000460 return "";
461 }
462 Entry& top = m_stack.back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000463 if (std::holds_alternative<JsonVector>(top.jvalue)) {
464 std::get<JsonVector>(top.jvalue).push_back(std::move(jvalue));
465 return "";
466 } else if (std::holds_alternative<JsonMap>(top.jvalue)) {
467 JsonMap& jmap = std::get<JsonMap>(top.jvalue);
468 if (top.has_map_key) {
469 top.has_map_key = false;
470 auto iter = jmap.find(top.map_key);
471 if (iter != jmap.end()) {
472 return "main: duplicate key: " + top.map_key;
Nigel Taocf6c5782020-08-03 23:43:45 +1000473 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000474 jmap.insert(iter, JsonMap::value_type(std::move(top.map_key),
475 std::move(jvalue)));
476 return "";
477 } else if (std::holds_alternative<std::string>(jvalue)) {
478 top.has_map_key = true;
479 top.map_key = std::move(std::get<std::string>(jvalue));
480 return "";
481 }
482 return "main: internal error: non-string map key";
483 } else {
484 return "main: internal error: non-container stack entry";
Nigel Taocf6c5782020-08-03 23:43:45 +1000485 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000486 }
487
Nigel Tao5396dbd2020-08-29 22:02:35 +1000488 std::string AppendNull() override { return Append(JsonValue()); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000489
Nigel Tao5396dbd2020-08-29 22:02:35 +1000490 std::string AppendBool(bool val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000491
Nigel Tao5396dbd2020-08-29 22:02:35 +1000492 std::string AppendI64(int64_t val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000493
Nigel Tao5396dbd2020-08-29 22:02:35 +1000494 std::string AppendF64(double val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000495
Nigel Taoca5da1f2020-08-10 15:26:29 +1000496 std::string AppendTextString(std::string&& val) override {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000497 return Append(JsonValue(std::move(val)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000498 }
499
Nigel Taoca5da1f2020-08-10 15:26:29 +1000500 std::string Push(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000501 if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000502 m_stack.push_back(JsonValue(static_cast<JsonVector*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000503 return "";
504 } else if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000505 m_stack.push_back(JsonValue(static_cast<JsonMap*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000506 return "";
507 }
508 return "main: internal error: bad push";
509 }
510
Nigel Taoca5da1f2020-08-10 15:26:29 +1000511 std::string Pop(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000512 if (m_stack.empty()) {
513 return "main: internal error: bad pop";
514 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000515 JsonValue jvalue = std::move(m_stack.back().jvalue);
Nigel Taocf6c5782020-08-03 23:43:45 +1000516 m_stack.pop_back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000517 return Append(std::move(jvalue));
Nigel Taocf6c5782020-08-03 23:43:45 +1000518 }
519
Nigel Taoca5da1f2020-08-10 15:26:29 +1000520 void Done(wuffs_aux::DecodeJsonResult& result,
521 wuffs_aux::sync_io::Input& input,
522 wuffs_aux::IOBuffer& buffer) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000523 if (!result.error_message.empty()) {
524 return;
525 } else if (m_stack.size() != 1) {
526 result.error_message = "main: internal error: bad depth";
527 return;
Nigel Tao8d779ae2020-08-29 23:10:08 +1000528 } else if (!g_flags.only_parse_dont_output) {
529 result.error_message = print_json_pointers(m_stack.back().jvalue, 0);
Nigel Taocf6c5782020-08-03 23:43:45 +1000530 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000531 }
532
533 private:
534 std::vector<Entry> m_stack;
535};
536
537// ----
538
Nigel Taod0b16cb2020-03-14 10:15:54 +1100539std::string //
540main1(int argc, char** argv) {
541 TRY(parse_flags(argc, argv));
Nigel Taob3438432020-08-13 00:06:56 +1000542 if (!g_flags.strict_json_pointer_syntax) {
Nigel Tao904004e2020-11-15 20:56:04 +1100543 g_quirks.push_back(
544 WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_N_TILDE_R_TILDE_T);
Nigel Taob3438432020-08-13 00:06:56 +1000545 }
Nigel Taod0b16cb2020-03-14 10:15:54 +1100546
Nigel Taocf6c5782020-08-03 23:43:45 +1000547 FILE* in = stdin;
Nigel Taod60815c2020-03-26 14:32:35 +1100548 if (g_flags.remaining_argc > 1) {
549 return g_usage;
550 } else if (g_flags.remaining_argc == 1) {
Nigel Tao816475b2021-07-07 20:28:35 +1000551 in = fopen(g_flags.remaining_argv[0], "rb");
Nigel Taocf6c5782020-08-03 23:43:45 +1000552 if (!in) {
553 return std::string("main: cannot read input file");
Nigel Taod0b16cb2020-03-14 10:15:54 +1100554 }
555 }
556
Nigel Tao2742a4f2020-08-17 00:02:49 +1000557 Callbacks callbacks;
558 wuffs_aux::sync_io::FileInput input(in);
Nigel Taocf6c5782020-08-03 23:43:45 +1000559 return wuffs_aux::DecodeJson(
Nigel Tao2742a4f2020-08-17 00:02:49 +1000560 callbacks, input,
Nigel Tao8aac6762020-08-12 22:47:45 +1000561 wuffs_base__make_slice_u32(g_quirks.data(), g_quirks.size()),
562 (g_flags.query_c_string ? g_flags.query_c_string : ""))
Nigel Taocf6c5782020-08-03 23:43:45 +1000563 .error_message;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100564}
565
566// ----
567
568int //
569compute_exit_code(std::string status_msg) {
570 if (status_msg.empty()) {
571 return 0;
572 }
Nigel Taofeded882020-08-29 10:32:12 +1000573 std::cerr << status_msg << '\n';
Nigel Taoa51867d2021-05-19 21:34:09 +1000574 // Return an exit code of 1 for regular (foreseen) errors, e.g. badly
Nigel Taod0b16cb2020-03-14 10:15:54 +1100575 // formatted or unsupported input.
576 //
577 // Return an exit code of 2 for internal (exceptional) errors, e.g. defensive
578 // run-time checks found that an internal invariant did not hold.
579 //
580 // Automated testing, including badly formatted inputs, can therefore
581 // discriminate between expected failure (exit code 1) and unexpected failure
582 // (other non-zero exit codes). Specifically, exit code 2 for internal
583 // invariant violation, exit code 139 (which is 128 + SIGSEGV on x86_64
584 // linux) for a segmentation fault (e.g. null pointer dereference).
585 return (status_msg.find("internal error:") != std::string::npos) ? 2 : 1;
586}
587
588int //
589main(int argc, char** argv) {
590 std::string z = main1(argc, argv);
591 int exit_code = compute_exit_code(z);
592 return exit_code;
593}