blob: 94dc95cf05b86ccadd30b81d8706d1e33094dcf7 [file] [log] [blame]
Nigel Taod0b16cb2020-03-14 10:15:54 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// ----------------
16
17/*
18jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON Pointer
19(RFC 6901) to stdout.
20
Nigel Taod60815c2020-03-26 14:32:35 +110021See the "const char* g_usage" string below for details.
Nigel Taod0b16cb2020-03-14 10:15:54 +110022
23----
24
25This program uses Wuffs' JSON decoder at a relatively high level, building
26in-memory representations of JSON 'things' (e.g. numbers, strings, objects).
27After the entire input has been converted, walking the tree prints the output
Nigel Taocf6c5782020-08-03 23:43:45 +100028(in sorted order). The wuffs_aux::DecodeJson library function converts the
29lower level token stream to higher level callbacks. This .cc file deals only
30with those callbacks, not with tokens per se.
Nigel Taod0b16cb2020-03-14 10:15:54 +110031
32This approach is centered around JSON things. Each JSON thing comprises one or
33more JSON tokens.
34
35An alternative, lower-level approach is in the sibling example/jsonptr program.
36Neither approach is better or worse per se, but when studying this program, be
37aware that there are multiple ways to use Wuffs' JSON decoder.
38
39The two programs, jsonfindptrs and jsonptr, also demonstrate different
40trade-offs with regard to JSON object duplicate keys. The JSON spec permits
41different implementations to allow or reject duplicate keys. It is not always
42clear which approach is safer. Rejecting them is certainly unambiguous, and
43security bugs can lurk in ambiguous corners of a file format, if two different
44implementations both silently accept a file but differ on how to interpret it.
45On the other hand, in the worst case, detecting duplicate keys requires O(N)
46memory, where N is the size of the (potentially untrusted) input.
47
48This program (jsonfindptrs) rejects duplicate keys.
49
50----
51
Nigel Tao50bfab92020-08-05 11:39:09 +100052To run:
Nigel Taod0b16cb2020-03-14 10:15:54 +110053
54$CXX jsonfindptrs.cc && ./a.out < ../../test/data/github-tags.json; rm -f a.out
55
56for a C++ compiler $CXX, such as clang++ or g++.
57*/
58
Nigel Tao5396dbd2020-08-29 22:02:35 +100059#if defined(__cplusplus) && (__cplusplus < 201703L)
60#error "This C++ program requires -std=c++17 or later"
Nigel Tao721190a2020-04-03 22:25:21 +110061#endif
62
Nigel Taocf6c5782020-08-03 23:43:45 +100063#include <stdio.h>
Nigel Tao6b7ce302020-07-07 16:19:46 +100064
Nigel Taod0b16cb2020-03-14 10:15:54 +110065#include <iostream>
66#include <map>
67#include <string>
68#include <vector>
69
Nigel Tao5396dbd2020-08-29 22:02:35 +100070// <variant> requires C++17.
71#include <variant>
72
Nigel Taod0b16cb2020-03-14 10:15:54 +110073// Wuffs ships as a "single file C library" or "header file library" as per
74// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
75//
76// To use that single file as a "foo.c"-like implementation, instead of a
77// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
78// compiling it.
79#define WUFFS_IMPLEMENTATION
80
81// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
82// release/c/etc.c whitelist which parts of Wuffs to build. That file contains
83// the entire Wuffs standard library, implementing a variety of codecs and file
84// formats. Without this macro definition, an optimizing compiler or linker may
85// very well discard Wuffs code for unused codecs, but listing the Wuffs
86// modules we use makes that process explicit. Preprocessing means that such
87// code simply isn't compiled.
88#define WUFFS_CONFIG__MODULES
Nigel Taocf6c5782020-08-03 23:43:45 +100089#define WUFFS_CONFIG__MODULE__AUX__BASE
90#define WUFFS_CONFIG__MODULE__AUX__JSON
Nigel Taod0b16cb2020-03-14 10:15:54 +110091#define WUFFS_CONFIG__MODULE__BASE
92#define WUFFS_CONFIG__MODULE__JSON
93
94// If building this program in an environment that doesn't easily accommodate
95// relative includes, you can use the script/inline-c-relative-includes.go
96// program to generate a stand-alone C++ file.
97#include "../../release/c/wuffs-unsupported-snapshot.c"
98
99#define TRY(error_msg) \
100 do { \
101 std::string z = error_msg; \
102 if (!z.empty()) { \
103 return z; \
104 } \
105 } while (false)
106
Nigel Taod60815c2020-03-26 14:32:35 +1100107static const char* g_usage =
Nigel Taod0b16cb2020-03-14 10:15:54 +1100108 "Usage: jsonfindptrs -flags input.json\n"
109 "\n"
110 "Flags:\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100111 " -d=NUM -max-output-depth=NUM\n"
Nigel Tao8aac6762020-08-12 22:47:45 +1000112 " -q=STR -query=STR\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000113 " -input-allow-comments\n"
114 " -input-allow-extra-comma\n"
115 " -input-allow-inf-nan-numbers\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000116 " -strict-json-pointer-syntax\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100117 "\n"
118 "The input.json filename is optional. If absent, it reads from stdin.\n"
119 "\n"
120 "----\n"
121 "\n"
122 "jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON\n"
123 "Pointer (RFC 6901) to stdout.\n"
124 "\n"
125 "For example, given RFC 6901 section 5's sample input\n"
126 "(https://tools.ietf.org/rfc/rfc6901.txt), this command:\n"
127 " jsonfindptrs rfc-6901-json-pointer.json\n"
128 "will print:\n"
129 " \n"
130 " /\n"
131 " / \n"
132 " /a~1b\n"
133 " /c%d\n"
134 " /e^f\n"
135 " /foo\n"
136 " /foo/0\n"
137 " /foo/1\n"
138 " /g|h\n"
139 " /i\\j\n"
140 " /k\"l\n"
141 " /m~0n\n"
142 "\n"
143 "The first three lines are (1) a 0-byte \"\", (2) a 1-byte \"/\" and (3)\n"
144 "a 2-byte \"/ \". Unlike a file system, the \"/\" JSON Pointer does not\n"
145 "identify the root. Instead, \"\" is the root and \"/\" is the child (the\n"
146 "value in a key-value pair) of the root whose key is the empty string.\n"
147 "Similarly, \"/xyz\" and \"/xyz/\" are two different nodes.\n"
148 "\n"
149 "----\n"
150 "\n"
151 "The JSON specification (https://json.org/) permits implementations that\n"
152 "allow duplicate keys, but this one does not. Conversely, it prints keys\n"
153 "in sorted order, but the overall output is not necessarily sorted\n"
154 "lexicographically. For example, \"/a/9\" would come before \"/a/10\",\n"
155 "and \"/b/c\", a child of \"/b\", would come before \"/b+\".\n"
156 "\n"
157 "This JSON implementation also rejects integer values outside ±M, where\n"
158 "M is ((1<<53)-1), also known as JavaScript's Number.MAX_SAFE_INTEGER.\n"
159 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000160 "The -input-allow-comments flag allows \"/*slash-star*/\" and\n"
161 "\"//slash-slash\" C-style comments within JSON input.\n"
162 "\n"
163 "The -input-allow-extra-comma flag allows input like \"[1,2,]\", with a\n"
Nigel Taoc766bb72020-07-09 12:59:32 +1000164 "comma after the final element of a JSON list or dictionary.\n"
165 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000166 "The -input-allow-inf-nan-numbers flag allows non-finite floating point\n"
167 "numbers (infinities and not-a-numbers) within JSON input.\n"
168 "\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100169 "----\n"
170 "\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000171 "The -strict-json-pointer-syntax flag restricts the output lines to\n"
172 "exactly RFC 6901, with only two escape sequences: \"~0\" and \"~1\" for\n"
173 "\"~\" and \"/\". Without this flag, this program also lets \"~n\" and\n"
174 "\"~r\" escape the New Line and Carriage Return ASCII control characters,\n"
175 "which can work better with line oriented Unix tools that assume exactly\n"
176 "one value (i.e. one JSON Pointer string) per line. With this flag, it\n"
177 "fails if the input JSON's keys contain \"\\u000A\" or \"\\u000D\".\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100178 "\n"
179 "----\n"
180 "\n"
181 "The JSON specification permits implementations to set their own maximum\n"
182 "input depth. This JSON implementation sets it to 1024.\n"
183 "\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100184 "The -d=NUM or -max-output-depth=NUM flag gives the maximum (inclusive)\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100185 "output depth. JSON containers ([] arrays and {} objects) can hold other\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100186 "containers. A bare -d or -max-output-depth is equivalent to -d=1,\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100187 "analogous to the Unix ls command. The flag's absence is equivalent to an\n"
188 "unlimited output depth, analogous to the Unix find command (and hence\n"
189 "the name of this program: jsonfindptrs).";
190
191// ----
192
Nigel Taocf6c5782020-08-03 23:43:45 +1000193std::vector<uint32_t> g_quirks;
194
Nigel Tao8098d962020-08-29 10:41:05 +1000195std::string g_dst;
196
Nigel Taob6c01b32020-08-29 10:46:04 +1000197// g_to_string_cache[i] caches the result of std::to_string(i).
198std::vector<std::string> g_to_string_cache;
199
Nigel Taod0b16cb2020-03-14 10:15:54 +1100200struct {
201 int remaining_argc;
202 char** remaining_argv;
203
Nigel Taod0b16cb2020-03-14 10:15:54 +1100204 bool strict_json_pointer_syntax;
Nigel Tao0a0c7d62020-08-18 23:31:27 +1000205
206 uint32_t max_output_depth;
207
208 char* query_c_string;
Nigel Taod60815c2020-03-26 14:32:35 +1100209} g_flags = {0};
Nigel Taod0b16cb2020-03-14 10:15:54 +1100210
211std::string //
212parse_flags(int argc, char** argv) {
Nigel Taod60815c2020-03-26 14:32:35 +1100213 g_flags.max_output_depth = 0xFFFFFFFF;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100214
215 int c = (argc > 0) ? 1 : 0; // Skip argv[0], the program name.
216 for (; c < argc; c++) {
217 char* arg = argv[c];
218 if (*arg++ != '-') {
219 break;
220 }
221
222 // A double-dash "--foo" is equivalent to a single-dash "-foo". As special
223 // cases, a bare "-" is not a flag (some programs may interpret it as
224 // stdin) and a bare "--" means to stop parsing flags.
225 if (*arg == '\x00') {
226 break;
227 } else if (*arg == '-') {
228 arg++;
229 if (*arg == '\x00') {
230 c++;
231 break;
232 }
233 }
234
Nigel Tao94440cf2020-04-02 22:28:24 +1100235 if (!strcmp(arg, "d") || !strcmp(arg, "max-output-depth")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100236 g_flags.max_output_depth = 1;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100237 continue;
Nigel Tao94440cf2020-04-02 22:28:24 +1100238 } else if (!strncmp(arg, "d=", 2) ||
Nigel Taod0b16cb2020-03-14 10:15:54 +1100239 !strncmp(arg, "max-output-depth=", 16)) {
240 while (*arg++ != '=') {
241 }
242 wuffs_base__result_u64 u = wuffs_base__parse_number_u64(
Nigel Tao6b7ce302020-07-07 16:19:46 +1000243 wuffs_base__make_slice_u8((uint8_t*)arg, strlen(arg)),
244 WUFFS_BASE__PARSE_NUMBER_XXX__DEFAULT_OPTIONS);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100245 if (wuffs_base__status__is_ok(&u.status) && (u.value <= 0xFFFFFFFF)) {
Nigel Taod60815c2020-03-26 14:32:35 +1100246 g_flags.max_output_depth = (uint32_t)(u.value);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100247 continue;
248 }
Nigel Taod60815c2020-03-26 14:32:35 +1100249 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100250 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000251 if (!strcmp(arg, "input-allow-comments")) {
252 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
253 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
254 continue;
255 }
256 if (!strcmp(arg, "input-allow-extra-comma")) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000257 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
Nigel Taoc766bb72020-07-09 12:59:32 +1000258 continue;
259 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000260 if (!strcmp(arg, "input-allow-inf-nan-numbers")) {
261 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS);
262 continue;
263 }
Nigel Tao8aac6762020-08-12 22:47:45 +1000264 if (!strncmp(arg, "q=", 2) || !strncmp(arg, "query=", 6)) {
265 while (*arg++ != '=') {
266 }
267 g_flags.query_c_string = arg;
268 continue;
269 }
Nigel Taoecadf722020-07-13 08:22:34 +1000270 if (!strcmp(arg, "strict-json-pointer-syntax")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100271 g_flags.strict_json_pointer_syntax = true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100272 continue;
273 }
274
Nigel Taod60815c2020-03-26 14:32:35 +1100275 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100276 }
277
Nigel Taod60815c2020-03-26 14:32:35 +1100278 g_flags.remaining_argc = argc - c;
279 g_flags.remaining_argv = argv + c;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100280 return "";
281}
282
Nigel Tao6b7ce302020-07-07 16:19:46 +1000283// ----
Nigel Taod0b16cb2020-03-14 10:15:54 +1100284
Nigel Tao5396dbd2020-08-29 22:02:35 +1000285struct JsonValue;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100286
Nigel Tao5396dbd2020-08-29 22:02:35 +1000287using JsonVector = std::vector<JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100288
Nigel Tao5396dbd2020-08-29 22:02:35 +1000289// We use a std::map in this example program to avoid dependencies outside of
290// the C++ standard library. If you're copy/pasting this JsonValue code,
291// consider a more efficient data structure such as an absl::btree_map.
292//
293// See CppCon 2014: Chandler Carruth "Efficiency with Algorithms, Performance
294// with Data Structures" at https://www.youtube.com/watch?v=fHNmRkzxHWs
295using JsonMap = std::map<std::string, JsonValue>;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100296
Nigel Tao5396dbd2020-08-29 22:02:35 +1000297using JsonVariant = std::variant<std::monostate,
298 bool,
299 int64_t,
300 double,
301 std::string,
302 JsonVector,
303 JsonMap>;
304
305struct JsonValue : JsonVariant {
306 JsonValue() : JsonVariant() {}
307 JsonValue(bool x) : JsonVariant(x) {}
308 JsonValue(int64_t x) : JsonVariant(x) {}
309 JsonValue(double x) : JsonVariant(x) {}
310 JsonValue(std::string&& x) : JsonVariant(x) {}
311 JsonValue(JsonVector* ignored) : JsonVariant(JsonVector()) {}
312 JsonValue(JsonMap* ignored) : JsonVariant(JsonMap()) {}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100313};
314
Nigel Taod0b16cb2020-03-14 10:15:54 +1100315// ----
316
Nigel Tao9d699392020-08-29 10:56:37 +1000317bool //
318escape_needed(const std::string& s) {
319 for (const char& c : s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100320 if ((c == '~') || (c == '/') || (c == '\n') || (c == '\r')) {
Nigel Tao9d699392020-08-29 10:56:37 +1000321 return true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100322 }
323 }
Nigel Tao9d699392020-08-29 10:56:37 +1000324 return false;
325}
Nigel Taod0b16cb2020-03-14 10:15:54 +1100326
Nigel Tao9d699392020-08-29 10:56:37 +1000327std::string //
328escape(const std::string& s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100329 std::string e;
330 e.reserve(8 + s.length());
Nigel Tao9d699392020-08-29 10:56:37 +1000331 for (const char& c : s) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100332 switch (c) {
333 case '~':
334 e += "~0";
335 break;
336 case '/':
337 e += "~1";
338 break;
339 case '\n':
Nigel Taod60815c2020-03-26 14:32:35 +1100340 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100341 return "";
342 }
343 e += "~n";
344 break;
345 case '\r':
Nigel Taod60815c2020-03-26 14:32:35 +1100346 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100347 return "";
348 }
349 e += "~r";
350 break;
351 default:
352 e += c;
353 break;
354 }
355 }
356 return e;
357}
358
359std::string //
Nigel Tao5396dbd2020-08-29 22:02:35 +1000360print_json_pointers(JsonValue& jvalue, uint32_t depth) {
Nigel Tao8098d962020-08-29 10:41:05 +1000361 std::cout << g_dst << '\n';
Nigel Taod60815c2020-03-26 14:32:35 +1100362 if (depth++ >= g_flags.max_output_depth) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100363 return "";
364 }
365
Nigel Tao8098d962020-08-29 10:41:05 +1000366 size_t n = g_dst.size();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000367 if (std::holds_alternative<JsonVector>(jvalue)) {
368 JsonVector& jvector = std::get<JsonVector>(jvalue);
369 g_dst += "/";
370 for (size_t i = 0; i < jvector.size(); i++) {
371 if (i >= g_to_string_cache.size()) {
372 g_to_string_cache.push_back(std::to_string(i));
Nigel Taod0b16cb2020-03-14 10:15:54 +1100373 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000374 g_dst += g_to_string_cache[i];
375 TRY(print_json_pointers(jvector[i], depth));
376 g_dst.resize(n + 1);
377 }
378 g_dst.resize(n);
379 } else if (std::holds_alternative<JsonMap>(jvalue)) {
380 g_dst += "/";
381 for (auto& kv : std::get<JsonMap>(jvalue)) {
382 if (!escape_needed(kv.first)) {
383 g_dst += kv.first;
384 } else {
385 std::string e = escape(kv.first);
386 if (e.empty()) {
387 return "main: unsupported \"\\u000A\" or \"\\u000D\" in object key";
Nigel Taod0b16cb2020-03-14 10:15:54 +1100388 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000389 g_dst += e;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100390 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000391 TRY(print_json_pointers(kv.second, depth));
392 g_dst.resize(n + 1);
393 }
394 g_dst.resize(n);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100395 }
396 return "";
397}
398
Nigel Taocf6c5782020-08-03 23:43:45 +1000399// ----
400
401class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
402 public:
403 struct Entry {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000404 Entry(JsonValue&& jvalue_arg)
405 : jvalue(std::move(jvalue_arg)), has_map_key(false), map_key() {}
Nigel Taocf6c5782020-08-03 23:43:45 +1000406
Nigel Tao5396dbd2020-08-29 22:02:35 +1000407 JsonValue jvalue;
Nigel Taocf6c5782020-08-03 23:43:45 +1000408 bool has_map_key;
409 std::string map_key;
410 };
411
412 Callbacks() = default;
413
Nigel Tao5396dbd2020-08-29 22:02:35 +1000414 std::string Append(JsonValue&& jvalue) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000415 if (m_stack.empty()) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000416 m_stack.push_back(Entry(std::move(jvalue)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000417 return "";
418 }
419 Entry& top = m_stack.back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000420 if (std::holds_alternative<JsonVector>(top.jvalue)) {
421 std::get<JsonVector>(top.jvalue).push_back(std::move(jvalue));
422 return "";
423 } else if (std::holds_alternative<JsonMap>(top.jvalue)) {
424 JsonMap& jmap = std::get<JsonMap>(top.jvalue);
425 if (top.has_map_key) {
426 top.has_map_key = false;
427 auto iter = jmap.find(top.map_key);
428 if (iter != jmap.end()) {
429 return "main: duplicate key: " + top.map_key;
Nigel Taocf6c5782020-08-03 23:43:45 +1000430 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000431 jmap.insert(iter, JsonMap::value_type(std::move(top.map_key),
432 std::move(jvalue)));
433 return "";
434 } else if (std::holds_alternative<std::string>(jvalue)) {
435 top.has_map_key = true;
436 top.map_key = std::move(std::get<std::string>(jvalue));
437 return "";
438 }
439 return "main: internal error: non-string map key";
440 } else {
441 return "main: internal error: non-container stack entry";
Nigel Taocf6c5782020-08-03 23:43:45 +1000442 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000443 }
444
Nigel Tao5396dbd2020-08-29 22:02:35 +1000445 std::string AppendNull() override { return Append(JsonValue()); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000446
Nigel Tao5396dbd2020-08-29 22:02:35 +1000447 std::string AppendBool(bool val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000448
Nigel Tao5396dbd2020-08-29 22:02:35 +1000449 std::string AppendI64(int64_t val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000450
Nigel Tao5396dbd2020-08-29 22:02:35 +1000451 std::string AppendF64(double val) override { return Append(JsonValue(val)); }
Nigel Taocf6c5782020-08-03 23:43:45 +1000452
Nigel Taoca5da1f2020-08-10 15:26:29 +1000453 std::string AppendTextString(std::string&& val) override {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000454 return Append(JsonValue(std::move(val)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000455 }
456
Nigel Taoca5da1f2020-08-10 15:26:29 +1000457 std::string Push(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000458 if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000459 m_stack.push_back(JsonValue(static_cast<JsonVector*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000460 return "";
461 } else if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
Nigel Tao5396dbd2020-08-29 22:02:35 +1000462 m_stack.push_back(JsonValue(static_cast<JsonMap*>(nullptr)));
Nigel Taocf6c5782020-08-03 23:43:45 +1000463 return "";
464 }
465 return "main: internal error: bad push";
466 }
467
Nigel Taoca5da1f2020-08-10 15:26:29 +1000468 std::string Pop(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000469 if (m_stack.empty()) {
470 return "main: internal error: bad pop";
471 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000472 JsonValue jvalue = std::move(m_stack.back().jvalue);
Nigel Taocf6c5782020-08-03 23:43:45 +1000473 m_stack.pop_back();
Nigel Tao5396dbd2020-08-29 22:02:35 +1000474 return Append(std::move(jvalue));
Nigel Taocf6c5782020-08-03 23:43:45 +1000475 }
476
Nigel Taoca5da1f2020-08-10 15:26:29 +1000477 void Done(wuffs_aux::DecodeJsonResult& result,
478 wuffs_aux::sync_io::Input& input,
479 wuffs_aux::IOBuffer& buffer) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000480 if (!result.error_message.empty()) {
481 return;
482 } else if (m_stack.size() != 1) {
483 result.error_message = "main: internal error: bad depth";
484 return;
485 }
Nigel Tao5396dbd2020-08-29 22:02:35 +1000486 result.error_message = print_json_pointers(m_stack.back().jvalue, 0);
Nigel Taocf6c5782020-08-03 23:43:45 +1000487 }
488
489 private:
490 std::vector<Entry> m_stack;
491};
492
493// ----
494
Nigel Taod0b16cb2020-03-14 10:15:54 +1100495std::string //
496main1(int argc, char** argv) {
497 TRY(parse_flags(argc, argv));
Nigel Taob3438432020-08-13 00:06:56 +1000498 if (!g_flags.strict_json_pointer_syntax) {
499 g_quirks.push_back(WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_R_TILDE_N);
500 }
Nigel Taod0b16cb2020-03-14 10:15:54 +1100501
Nigel Taocf6c5782020-08-03 23:43:45 +1000502 FILE* in = stdin;
Nigel Taod60815c2020-03-26 14:32:35 +1100503 if (g_flags.remaining_argc > 1) {
504 return g_usage;
505 } else if (g_flags.remaining_argc == 1) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000506 in = fopen(g_flags.remaining_argv[0], "r");
507 if (!in) {
508 return std::string("main: cannot read input file");
Nigel Taod0b16cb2020-03-14 10:15:54 +1100509 }
510 }
511
Nigel Tao2742a4f2020-08-17 00:02:49 +1000512 Callbacks callbacks;
513 wuffs_aux::sync_io::FileInput input(in);
Nigel Taocf6c5782020-08-03 23:43:45 +1000514 return wuffs_aux::DecodeJson(
Nigel Tao2742a4f2020-08-17 00:02:49 +1000515 callbacks, input,
Nigel Tao8aac6762020-08-12 22:47:45 +1000516 wuffs_base__make_slice_u32(g_quirks.data(), g_quirks.size()),
517 (g_flags.query_c_string ? g_flags.query_c_string : ""))
Nigel Taocf6c5782020-08-03 23:43:45 +1000518 .error_message;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100519}
520
521// ----
522
523int //
524compute_exit_code(std::string status_msg) {
525 if (status_msg.empty()) {
526 return 0;
527 }
Nigel Taofeded882020-08-29 10:32:12 +1000528 std::cerr << status_msg << '\n';
Nigel Taod0b16cb2020-03-14 10:15:54 +1100529 // Return an exit code of 1 for regular (forseen) errors, e.g. badly
530 // formatted or unsupported input.
531 //
532 // Return an exit code of 2 for internal (exceptional) errors, e.g. defensive
533 // run-time checks found that an internal invariant did not hold.
534 //
535 // Automated testing, including badly formatted inputs, can therefore
536 // discriminate between expected failure (exit code 1) and unexpected failure
537 // (other non-zero exit codes). Specifically, exit code 2 for internal
538 // invariant violation, exit code 139 (which is 128 + SIGSEGV on x86_64
539 // linux) for a segmentation fault (e.g. null pointer dereference).
540 return (status_msg.find("internal error:") != std::string::npos) ? 2 : 1;
541}
542
543int //
544main(int argc, char** argv) {
545 std::string z = main1(argc, argv);
546 int exit_code = compute_exit_code(z);
547 return exit_code;
548}