blob: 1e03d7f65aba2cbd1f1b75a54a50109baab7a445 [file] [log] [blame]
Nigel Taod0b16cb2020-03-14 10:15:54 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// ----------------
16
17/*
18jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON Pointer
19(RFC 6901) to stdout.
20
Nigel Taod60815c2020-03-26 14:32:35 +110021See the "const char* g_usage" string below for details.
Nigel Taod0b16cb2020-03-14 10:15:54 +110022
23----
24
25This program uses Wuffs' JSON decoder at a relatively high level, building
26in-memory representations of JSON 'things' (e.g. numbers, strings, objects).
27After the entire input has been converted, walking the tree prints the output
Nigel Taocf6c5782020-08-03 23:43:45 +100028(in sorted order). The wuffs_aux::DecodeJson library function converts the
29lower level token stream to higher level callbacks. This .cc file deals only
30with those callbacks, not with tokens per se.
Nigel Taod0b16cb2020-03-14 10:15:54 +110031
32This approach is centered around JSON things. Each JSON thing comprises one or
33more JSON tokens.
34
35An alternative, lower-level approach is in the sibling example/jsonptr program.
36Neither approach is better or worse per se, but when studying this program, be
37aware that there are multiple ways to use Wuffs' JSON decoder.
38
39The two programs, jsonfindptrs and jsonptr, also demonstrate different
40trade-offs with regard to JSON object duplicate keys. The JSON spec permits
41different implementations to allow or reject duplicate keys. It is not always
42clear which approach is safer. Rejecting them is certainly unambiguous, and
43security bugs can lurk in ambiguous corners of a file format, if two different
44implementations both silently accept a file but differ on how to interpret it.
45On the other hand, in the worst case, detecting duplicate keys requires O(N)
46memory, where N is the size of the (potentially untrusted) input.
47
48This program (jsonfindptrs) rejects duplicate keys.
49
50----
51
Nigel Tao50bfab92020-08-05 11:39:09 +100052To run:
Nigel Taod0b16cb2020-03-14 10:15:54 +110053
54$CXX jsonfindptrs.cc && ./a.out < ../../test/data/github-tags.json; rm -f a.out
55
56for a C++ compiler $CXX, such as clang++ or g++.
57*/
58
Nigel Tao721190a2020-04-03 22:25:21 +110059#if defined(__cplusplus) && (__cplusplus < 201103L)
60#error "This C++ program requires -std=c++11 or later"
61#endif
62
Nigel Taocf6c5782020-08-03 23:43:45 +100063#include <stdio.h>
Nigel Tao6b7ce302020-07-07 16:19:46 +100064
Nigel Taod0b16cb2020-03-14 10:15:54 +110065#include <iostream>
66#include <map>
67#include <string>
68#include <vector>
69
70// Wuffs ships as a "single file C library" or "header file library" as per
71// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
72//
73// To use that single file as a "foo.c"-like implementation, instead of a
74// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
75// compiling it.
76#define WUFFS_IMPLEMENTATION
77
78// Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of
79// release/c/etc.c whitelist which parts of Wuffs to build. That file contains
80// the entire Wuffs standard library, implementing a variety of codecs and file
81// formats. Without this macro definition, an optimizing compiler or linker may
82// very well discard Wuffs code for unused codecs, but listing the Wuffs
83// modules we use makes that process explicit. Preprocessing means that such
84// code simply isn't compiled.
85#define WUFFS_CONFIG__MODULES
Nigel Taocf6c5782020-08-03 23:43:45 +100086#define WUFFS_CONFIG__MODULE__AUX__BASE
87#define WUFFS_CONFIG__MODULE__AUX__JSON
Nigel Taod0b16cb2020-03-14 10:15:54 +110088#define WUFFS_CONFIG__MODULE__BASE
89#define WUFFS_CONFIG__MODULE__JSON
90
91// If building this program in an environment that doesn't easily accommodate
92// relative includes, you can use the script/inline-c-relative-includes.go
93// program to generate a stand-alone C++ file.
94#include "../../release/c/wuffs-unsupported-snapshot.c"
95
96#define TRY(error_msg) \
97 do { \
98 std::string z = error_msg; \
99 if (!z.empty()) { \
100 return z; \
101 } \
102 } while (false)
103
Nigel Taod60815c2020-03-26 14:32:35 +1100104static const char* g_usage =
Nigel Taod0b16cb2020-03-14 10:15:54 +1100105 "Usage: jsonfindptrs -flags input.json\n"
106 "\n"
107 "Flags:\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100108 " -d=NUM -max-output-depth=NUM\n"
Nigel Tao8aac6762020-08-12 22:47:45 +1000109 " -q=STR -query=STR\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000110 " -input-allow-comments\n"
111 " -input-allow-extra-comma\n"
112 " -input-allow-inf-nan-numbers\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000113 " -strict-json-pointer-syntax\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100114 "\n"
115 "The input.json filename is optional. If absent, it reads from stdin.\n"
116 "\n"
117 "----\n"
118 "\n"
119 "jsonfindptrs reads UTF-8 JSON from stdin and writes every node's JSON\n"
120 "Pointer (RFC 6901) to stdout.\n"
121 "\n"
122 "For example, given RFC 6901 section 5's sample input\n"
123 "(https://tools.ietf.org/rfc/rfc6901.txt), this command:\n"
124 " jsonfindptrs rfc-6901-json-pointer.json\n"
125 "will print:\n"
126 " \n"
127 " /\n"
128 " / \n"
129 " /a~1b\n"
130 " /c%d\n"
131 " /e^f\n"
132 " /foo\n"
133 " /foo/0\n"
134 " /foo/1\n"
135 " /g|h\n"
136 " /i\\j\n"
137 " /k\"l\n"
138 " /m~0n\n"
139 "\n"
140 "The first three lines are (1) a 0-byte \"\", (2) a 1-byte \"/\" and (3)\n"
141 "a 2-byte \"/ \". Unlike a file system, the \"/\" JSON Pointer does not\n"
142 "identify the root. Instead, \"\" is the root and \"/\" is the child (the\n"
143 "value in a key-value pair) of the root whose key is the empty string.\n"
144 "Similarly, \"/xyz\" and \"/xyz/\" are two different nodes.\n"
145 "\n"
146 "----\n"
147 "\n"
148 "The JSON specification (https://json.org/) permits implementations that\n"
149 "allow duplicate keys, but this one does not. Conversely, it prints keys\n"
150 "in sorted order, but the overall output is not necessarily sorted\n"
151 "lexicographically. For example, \"/a/9\" would come before \"/a/10\",\n"
152 "and \"/b/c\", a child of \"/b\", would come before \"/b+\".\n"
153 "\n"
154 "This JSON implementation also rejects integer values outside ±M, where\n"
155 "M is ((1<<53)-1), also known as JavaScript's Number.MAX_SAFE_INTEGER.\n"
156 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000157 "The -input-allow-comments flag allows \"/*slash-star*/\" and\n"
158 "\"//slash-slash\" C-style comments within JSON input.\n"
159 "\n"
160 "The -input-allow-extra-comma flag allows input like \"[1,2,]\", with a\n"
Nigel Taoc766bb72020-07-09 12:59:32 +1000161 "comma after the final element of a JSON list or dictionary.\n"
162 "\n"
Nigel Tao0a5940d2020-08-07 13:15:41 +1000163 "The -input-allow-inf-nan-numbers flag allows non-finite floating point\n"
164 "numbers (infinities and not-a-numbers) within JSON input.\n"
165 "\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100166 "----\n"
167 "\n"
Nigel Taoecadf722020-07-13 08:22:34 +1000168 "The -strict-json-pointer-syntax flag restricts the output lines to\n"
169 "exactly RFC 6901, with only two escape sequences: \"~0\" and \"~1\" for\n"
170 "\"~\" and \"/\". Without this flag, this program also lets \"~n\" and\n"
171 "\"~r\" escape the New Line and Carriage Return ASCII control characters,\n"
172 "which can work better with line oriented Unix tools that assume exactly\n"
173 "one value (i.e. one JSON Pointer string) per line. With this flag, it\n"
174 "fails if the input JSON's keys contain \"\\u000A\" or \"\\u000D\".\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100175 "\n"
176 "----\n"
177 "\n"
178 "The JSON specification permits implementations to set their own maximum\n"
179 "input depth. This JSON implementation sets it to 1024.\n"
180 "\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100181 "The -d=NUM or -max-output-depth=NUM flag gives the maximum (inclusive)\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100182 "output depth. JSON containers ([] arrays and {} objects) can hold other\n"
Nigel Tao94440cf2020-04-02 22:28:24 +1100183 "containers. A bare -d or -max-output-depth is equivalent to -d=1,\n"
Nigel Taod0b16cb2020-03-14 10:15:54 +1100184 "analogous to the Unix ls command. The flag's absence is equivalent to an\n"
185 "unlimited output depth, analogous to the Unix find command (and hence\n"
186 "the name of this program: jsonfindptrs).";
187
188// ----
189
Nigel Taocf6c5782020-08-03 23:43:45 +1000190std::vector<uint32_t> g_quirks;
191
Nigel Tao8098d962020-08-29 10:41:05 +1000192std::string g_dst;
193
Nigel Taob6c01b32020-08-29 10:46:04 +1000194// g_to_string_cache[i] caches the result of std::to_string(i).
195std::vector<std::string> g_to_string_cache;
196
Nigel Taod0b16cb2020-03-14 10:15:54 +1100197struct {
198 int remaining_argc;
199 char** remaining_argv;
200
Nigel Taod0b16cb2020-03-14 10:15:54 +1100201 bool strict_json_pointer_syntax;
Nigel Tao0a0c7d62020-08-18 23:31:27 +1000202
203 uint32_t max_output_depth;
204
205 char* query_c_string;
Nigel Taod60815c2020-03-26 14:32:35 +1100206} g_flags = {0};
Nigel Taod0b16cb2020-03-14 10:15:54 +1100207
208std::string //
209parse_flags(int argc, char** argv) {
Nigel Taod60815c2020-03-26 14:32:35 +1100210 g_flags.max_output_depth = 0xFFFFFFFF;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100211
212 int c = (argc > 0) ? 1 : 0; // Skip argv[0], the program name.
213 for (; c < argc; c++) {
214 char* arg = argv[c];
215 if (*arg++ != '-') {
216 break;
217 }
218
219 // A double-dash "--foo" is equivalent to a single-dash "-foo". As special
220 // cases, a bare "-" is not a flag (some programs may interpret it as
221 // stdin) and a bare "--" means to stop parsing flags.
222 if (*arg == '\x00') {
223 break;
224 } else if (*arg == '-') {
225 arg++;
226 if (*arg == '\x00') {
227 c++;
228 break;
229 }
230 }
231
Nigel Tao94440cf2020-04-02 22:28:24 +1100232 if (!strcmp(arg, "d") || !strcmp(arg, "max-output-depth")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100233 g_flags.max_output_depth = 1;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100234 continue;
Nigel Tao94440cf2020-04-02 22:28:24 +1100235 } else if (!strncmp(arg, "d=", 2) ||
Nigel Taod0b16cb2020-03-14 10:15:54 +1100236 !strncmp(arg, "max-output-depth=", 16)) {
237 while (*arg++ != '=') {
238 }
239 wuffs_base__result_u64 u = wuffs_base__parse_number_u64(
Nigel Tao6b7ce302020-07-07 16:19:46 +1000240 wuffs_base__make_slice_u8((uint8_t*)arg, strlen(arg)),
241 WUFFS_BASE__PARSE_NUMBER_XXX__DEFAULT_OPTIONS);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100242 if (wuffs_base__status__is_ok(&u.status) && (u.value <= 0xFFFFFFFF)) {
Nigel Taod60815c2020-03-26 14:32:35 +1100243 g_flags.max_output_depth = (uint32_t)(u.value);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100244 continue;
245 }
Nigel Taod60815c2020-03-26 14:32:35 +1100246 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100247 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000248 if (!strcmp(arg, "input-allow-comments")) {
249 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_BLOCK);
250 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_COMMENT_LINE);
251 continue;
252 }
253 if (!strcmp(arg, "input-allow-extra-comma")) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000254 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_EXTRA_COMMA);
Nigel Taoc766bb72020-07-09 12:59:32 +1000255 continue;
256 }
Nigel Tao0a5940d2020-08-07 13:15:41 +1000257 if (!strcmp(arg, "input-allow-inf-nan-numbers")) {
258 g_quirks.push_back(WUFFS_JSON__QUIRK_ALLOW_INF_NAN_NUMBERS);
259 continue;
260 }
Nigel Tao8aac6762020-08-12 22:47:45 +1000261 if (!strncmp(arg, "q=", 2) || !strncmp(arg, "query=", 6)) {
262 while (*arg++ != '=') {
263 }
264 g_flags.query_c_string = arg;
265 continue;
266 }
Nigel Taoecadf722020-07-13 08:22:34 +1000267 if (!strcmp(arg, "strict-json-pointer-syntax")) {
Nigel Taod60815c2020-03-26 14:32:35 +1100268 g_flags.strict_json_pointer_syntax = true;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100269 continue;
270 }
271
Nigel Taod60815c2020-03-26 14:32:35 +1100272 return g_usage;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100273 }
274
Nigel Taod60815c2020-03-26 14:32:35 +1100275 g_flags.remaining_argc = argc - c;
276 g_flags.remaining_argv = argv + c;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100277 return "";
278}
279
Nigel Tao6b7ce302020-07-07 16:19:46 +1000280// ----
Nigel Taod0b16cb2020-03-14 10:15:54 +1100281
Nigel Taod0b16cb2020-03-14 10:15:54 +1100282class JsonThing {
283 public:
Nigel Taod0b16cb2020-03-14 10:15:54 +1100284 using Vector = std::vector<JsonThing>;
285
286 // We use a std::map in this example program to avoid dependencies outside of
287 // the C++ standard library. If you're copy/pasting this JsonThing code,
288 // consider a more efficient data structure such as an absl::btree_map.
289 //
290 // See CppCon 2014: Chandler Carruth "Efficiency with Algorithms, Performance
291 // with Data Structures" at https://www.youtube.com/watch?v=fHNmRkzxHWs
292 using Map = std::map<std::string, JsonThing>;
293
294 enum class Kind {
295 Null,
296 Bool,
297 Int64,
298 Float64,
299 String,
300 Array,
301 Object,
302 } kind = Kind::Null;
303
304 struct Value {
305 bool b = false;
306 int64_t i = 0;
307 double f = 0;
308 std::string s;
309 Vector a;
310 Map o;
311 } value;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100312};
313
Nigel Taod0b16cb2020-03-14 10:15:54 +1100314// ----
315
316std::string //
317escape(std::string s) {
318 for (char& c : s) {
319 if ((c == '~') || (c == '/') || (c == '\n') || (c == '\r')) {
320 goto escape_needed;
321 }
322 }
323 return s;
324
325escape_needed:
326 std::string e;
327 e.reserve(8 + s.length());
328 for (char& c : s) {
329 switch (c) {
330 case '~':
331 e += "~0";
332 break;
333 case '/':
334 e += "~1";
335 break;
336 case '\n':
Nigel Taod60815c2020-03-26 14:32:35 +1100337 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100338 return "";
339 }
340 e += "~n";
341 break;
342 case '\r':
Nigel Taod60815c2020-03-26 14:32:35 +1100343 if (g_flags.strict_json_pointer_syntax) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100344 return "";
345 }
346 e += "~r";
347 break;
348 default:
349 e += c;
350 break;
351 }
352 }
353 return e;
354}
355
356std::string //
Nigel Tao8098d962020-08-29 10:41:05 +1000357print_json_pointers(JsonThing& jt, uint32_t depth) {
358 std::cout << g_dst << '\n';
Nigel Taod60815c2020-03-26 14:32:35 +1100359 if (depth++ >= g_flags.max_output_depth) {
Nigel Taod0b16cb2020-03-14 10:15:54 +1100360 return "";
361 }
362
Nigel Tao8098d962020-08-29 10:41:05 +1000363 size_t n = g_dst.size();
Nigel Taod0b16cb2020-03-14 10:15:54 +1100364 switch (jt.kind) {
365 case JsonThing::Kind::Array:
Nigel Tao8098d962020-08-29 10:41:05 +1000366 g_dst += "/";
Nigel Taod0b16cb2020-03-14 10:15:54 +1100367 for (size_t i = 0; i < jt.value.a.size(); i++) {
Nigel Taob6c01b32020-08-29 10:46:04 +1000368 if (i >= g_to_string_cache.size()) {
369 g_to_string_cache.push_back(std::to_string(i));
370 }
371 g_dst += g_to_string_cache[i];
Nigel Tao8098d962020-08-29 10:41:05 +1000372 TRY(print_json_pointers(jt.value.a[i], depth));
373 g_dst.resize(n + 1);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100374 }
Nigel Tao8098d962020-08-29 10:41:05 +1000375 g_dst.resize(n);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100376 break;
377 case JsonThing::Kind::Object:
Nigel Tao8098d962020-08-29 10:41:05 +1000378 g_dst += "/";
Nigel Taod0b16cb2020-03-14 10:15:54 +1100379 for (auto& kv : jt.value.o) {
380 std::string e = escape(kv.first);
381 if (e.empty() && !kv.first.empty()) {
382 return "main: unsupported \"\\u000A\" or \"\\u000D\" in object key";
383 }
Nigel Tao8098d962020-08-29 10:41:05 +1000384 g_dst += e;
385 TRY(print_json_pointers(kv.second, depth));
386 g_dst.resize(n + 1);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100387 }
Nigel Tao8098d962020-08-29 10:41:05 +1000388 g_dst.resize(n);
Nigel Taod0b16cb2020-03-14 10:15:54 +1100389 break;
Nigel Tao18ef5b42020-03-16 10:37:47 +1100390 default:
391 break;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100392 }
393 return "";
394}
395
Nigel Taocf6c5782020-08-03 23:43:45 +1000396// ----
397
398class Callbacks : public wuffs_aux::DecodeJsonCallbacks {
399 public:
400 struct Entry {
401 Entry(JsonThing&& jt)
402 : thing(std::move(jt)), has_map_key(false), map_key() {}
403
404 JsonThing thing;
405 bool has_map_key;
406 std::string map_key;
407 };
408
409 Callbacks() = default;
410
411 std::string Append(JsonThing&& jt) {
412 if (m_stack.empty()) {
413 m_stack.push_back(Entry(std::move(jt)));
414 return "";
415 }
416 Entry& top = m_stack.back();
417 switch (top.thing.kind) {
418 case JsonThing::Kind::Array:
419 top.thing.value.a.push_back(std::move(jt));
420 return "";
421 case JsonThing::Kind::Object:
422 if (top.has_map_key) {
423 top.has_map_key = false;
424 auto iter = top.thing.value.o.find(top.map_key);
425 if (iter != top.thing.value.o.end()) {
426 return "main: duplicate key: " + top.map_key;
427 }
428 top.thing.value.o.insert(
429 iter, JsonThing::Map::value_type(std::move(top.map_key),
430 std::move(jt)));
431 return "";
432 } else if (jt.kind == JsonThing::Kind::String) {
433 top.has_map_key = true;
434 top.map_key = std::move(jt.value.s);
435 return "";
436 }
437 return "main: internal error: non-string map key";
Nigel Tao7fd3bb62020-08-24 21:34:53 +1000438 default:
439 return "main: internal error: non-container stack entry";
Nigel Taocf6c5782020-08-03 23:43:45 +1000440 }
Nigel Taocf6c5782020-08-03 23:43:45 +1000441 }
442
Nigel Taoca5da1f2020-08-10 15:26:29 +1000443 std::string AppendNull() override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000444 JsonThing jt;
445 jt.kind = JsonThing::Kind::Null;
446 return Append(std::move(jt));
447 }
448
Nigel Taoca5da1f2020-08-10 15:26:29 +1000449 std::string AppendBool(bool val) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000450 JsonThing jt;
451 jt.kind = JsonThing::Kind::Bool;
452 jt.value.b = val;
453 return Append(std::move(jt));
454 }
455
Nigel Taoca5da1f2020-08-10 15:26:29 +1000456 std::string AppendI64(int64_t val) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000457 JsonThing jt;
458 jt.kind = JsonThing::Kind::Int64;
459 jt.value.i = val;
460 return Append(std::move(jt));
461 }
462
Nigel Taoca5da1f2020-08-10 15:26:29 +1000463 std::string AppendF64(double val) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000464 JsonThing jt;
465 jt.kind = JsonThing::Kind::Float64;
466 jt.value.f = val;
467 return Append(std::move(jt));
468 }
469
Nigel Taoca5da1f2020-08-10 15:26:29 +1000470 std::string AppendTextString(std::string&& val) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000471 JsonThing jt;
472 jt.kind = JsonThing::Kind::String;
473 jt.value.s = std::move(val);
474 return Append(std::move(jt));
475 }
476
Nigel Taoca5da1f2020-08-10 15:26:29 +1000477 std::string Push(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000478 if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST) {
479 JsonThing jt;
480 jt.kind = JsonThing::Kind::Array;
481 m_stack.push_back(std::move(jt));
482 return "";
483 } else if (flags & WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT) {
484 JsonThing jt;
485 jt.kind = JsonThing::Kind::Object;
486 m_stack.push_back(std::move(jt));
487 return "";
488 }
489 return "main: internal error: bad push";
490 }
491
Nigel Taoca5da1f2020-08-10 15:26:29 +1000492 std::string Pop(uint32_t flags) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000493 if (m_stack.empty()) {
494 return "main: internal error: bad pop";
495 }
496 JsonThing jt = std::move(m_stack.back().thing);
497 m_stack.pop_back();
498 return Append(std::move(jt));
499 }
500
Nigel Taoca5da1f2020-08-10 15:26:29 +1000501 void Done(wuffs_aux::DecodeJsonResult& result,
502 wuffs_aux::sync_io::Input& input,
503 wuffs_aux::IOBuffer& buffer) override {
Nigel Taocf6c5782020-08-03 23:43:45 +1000504 if (!result.error_message.empty()) {
505 return;
506 } else if (m_stack.size() != 1) {
507 result.error_message = "main: internal error: bad depth";
508 return;
509 }
Nigel Tao8098d962020-08-29 10:41:05 +1000510 result.error_message = print_json_pointers(m_stack.back().thing, 0);
Nigel Taocf6c5782020-08-03 23:43:45 +1000511 }
512
513 private:
514 std::vector<Entry> m_stack;
515};
516
517// ----
518
Nigel Taod0b16cb2020-03-14 10:15:54 +1100519std::string //
520main1(int argc, char** argv) {
521 TRY(parse_flags(argc, argv));
Nigel Taob3438432020-08-13 00:06:56 +1000522 if (!g_flags.strict_json_pointer_syntax) {
523 g_quirks.push_back(WUFFS_JSON__QUIRK_JSON_POINTER_ALLOW_TILDE_R_TILDE_N);
524 }
Nigel Taod0b16cb2020-03-14 10:15:54 +1100525
Nigel Taocf6c5782020-08-03 23:43:45 +1000526 FILE* in = stdin;
Nigel Taod60815c2020-03-26 14:32:35 +1100527 if (g_flags.remaining_argc > 1) {
528 return g_usage;
529 } else if (g_flags.remaining_argc == 1) {
Nigel Taocf6c5782020-08-03 23:43:45 +1000530 in = fopen(g_flags.remaining_argv[0], "r");
531 if (!in) {
532 return std::string("main: cannot read input file");
Nigel Taod0b16cb2020-03-14 10:15:54 +1100533 }
534 }
535
Nigel Tao2742a4f2020-08-17 00:02:49 +1000536 Callbacks callbacks;
537 wuffs_aux::sync_io::FileInput input(in);
Nigel Taocf6c5782020-08-03 23:43:45 +1000538 return wuffs_aux::DecodeJson(
Nigel Tao2742a4f2020-08-17 00:02:49 +1000539 callbacks, input,
Nigel Tao8aac6762020-08-12 22:47:45 +1000540 wuffs_base__make_slice_u32(g_quirks.data(), g_quirks.size()),
541 (g_flags.query_c_string ? g_flags.query_c_string : ""))
Nigel Taocf6c5782020-08-03 23:43:45 +1000542 .error_message;
Nigel Taod0b16cb2020-03-14 10:15:54 +1100543}
544
545// ----
546
547int //
548compute_exit_code(std::string status_msg) {
549 if (status_msg.empty()) {
550 return 0;
551 }
Nigel Taofeded882020-08-29 10:32:12 +1000552 std::cerr << status_msg << '\n';
Nigel Taod0b16cb2020-03-14 10:15:54 +1100553 // Return an exit code of 1 for regular (forseen) errors, e.g. badly
554 // formatted or unsupported input.
555 //
556 // Return an exit code of 2 for internal (exceptional) errors, e.g. defensive
557 // run-time checks found that an internal invariant did not hold.
558 //
559 // Automated testing, including badly formatted inputs, can therefore
560 // discriminate between expected failure (exit code 1) and unexpected failure
561 // (other non-zero exit codes). Specifically, exit code 2 for internal
562 // invariant violation, exit code 139 (which is 128 + SIGSEGV on x86_64
563 // linux) for a segmentation fault (e.g. null pointer dereference).
564 return (status_msg.find("internal error:") != std::string::npos) ? 2 : 1;
565}
566
567int //
568main(int argc, char** argv) {
569 std::string z = main1(argc, argv);
570 int exit_code = compute_exit_code(z);
571 return exit_code;
572}