Nigel Tao | 0eee231 | 2020-07-03 12:48:37 +1000 | [diff] [blame] | 1 | // Copyright 2020 The Wuffs Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | // ---------------- |
| 16 | |
| 17 | // process-json-numbers.c processes all the numbers in the JSON-formatted data |
| 18 | // read from stdin. It succeeds (with exit code 0) if the input is valid JSON |
| 19 | // and all of the numbers within were processed without error. |
| 20 | // |
| 21 | // Without further flags, processing is a no-op and the program only verifies |
| 22 | // the JSON structure. |
| 23 | // |
| 24 | // Pass -e (--emit-number-str) to emit each number (as a string) on its own |
| 25 | // line. |
| 26 | // |
| 27 | // Pass -p (--parse-number-f64) to call wuffs_base__parse_number_f64 on each |
| 28 | // number. Timing this program with and without this flag gives a rough measure |
| 29 | // of how much time is spent solely in wuffs_base__parse_number_f64. |
| 30 | // |
| 31 | // Pass -r (--render-number-f64) to call wuffs_base__render_number_f64 (with |
| 32 | // WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION) on each number. Timing |
| 33 | // this program with and without this flag gives a rough measure of how much |
| 34 | // time is spent solely in wuffs_base__render_number_f64. |
| 35 | // |
| 36 | // The -r flag is ignored unless -p is also passed. |
| 37 | // |
| 38 | // This program's purpose is to benchmark the wuffs_base__etc_f64 functions. |
| 39 | // It's not about JSON per se, but JSON files are a source of realistic |
| 40 | // floating point numbers. |
| 41 | |
| 42 | #include <inttypes.h> |
| 43 | #include <stdio.h> |
| 44 | #include <string.h> |
| 45 | #include <unistd.h> |
| 46 | |
| 47 | // Wuffs ships as a "single file C library" or "header file library" as per |
| 48 | // https://github.com/nothings/stb/blob/master/docs/stb_howto.txt |
| 49 | // |
| 50 | // To use that single file as a "foo.c"-like implementation, instead of a |
| 51 | // "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or |
| 52 | // compiling it. |
| 53 | #define WUFFS_IMPLEMENTATION |
| 54 | |
| 55 | // Defining the WUFFS_CONFIG__MODULE* macros are optional, but it lets users of |
Nigel Tao | 2f78804 | 2021-01-23 19:29:19 +1100 | [diff] [blame] | 56 | // release/c/etc.c choose which parts of Wuffs to build. That file contains the |
| 57 | // entire Wuffs standard library, implementing a variety of codecs and file |
Nigel Tao | 0eee231 | 2020-07-03 12:48:37 +1000 | [diff] [blame] | 58 | // formats. Without this macro definition, an optimizing compiler or linker may |
| 59 | // very well discard Wuffs code for unused codecs, but listing the Wuffs |
| 60 | // modules we use makes that process explicit. Preprocessing means that such |
| 61 | // code simply isn't compiled. |
| 62 | #define WUFFS_CONFIG__MODULES |
| 63 | #define WUFFS_CONFIG__MODULE__BASE |
| 64 | #define WUFFS_CONFIG__MODULE__JSON |
| 65 | |
| 66 | // If building this program in an environment that doesn't easily accommodate |
| 67 | // relative includes, you can use the script/inline-c-relative-includes.go |
Nigel Tao | a20a2bb | 2020-09-07 21:06:58 +1000 | [diff] [blame] | 68 | // program to generate a stand-alone C file. |
Nigel Tao | 0eee231 | 2020-07-03 12:48:37 +1000 | [diff] [blame] | 69 | #include "../release/c/wuffs-unsupported-snapshot.c" |
| 70 | |
Nigel Tao | 27766ae | 2020-07-09 10:59:54 +1000 | [diff] [blame] | 71 | // Uncomment this to use the github.com/lemire/fast_double_parser library. This |
| 72 | // header-only library is C++, not C. |
Nigel Tao | 510c230 | 2020-09-07 22:59:45 +1000 | [diff] [blame] | 73 | // #define USE_LEMIRE_FAST_DOUBLE_PARSER |
Nigel Tao | 27766ae | 2020-07-09 10:59:54 +1000 | [diff] [blame] | 74 | |
Nigel Tao | 510c230 | 2020-09-07 22:59:45 +1000 | [diff] [blame] | 75 | #ifdef USE_LEMIRE_FAST_DOUBLE_PARSER |
Nigel Tao | 27766ae | 2020-07-09 10:59:54 +1000 | [diff] [blame] | 76 | #include "/the/path/to/fast_double_parser/include/fast_double_parser.h" |
| 77 | #endif |
| 78 | |
Nigel Tao | 0eee231 | 2020-07-03 12:48:37 +1000 | [diff] [blame] | 79 | // Wuffs allows either statically or dynamically allocated work buffers. This |
| 80 | // program exercises static allocation. |
| 81 | #define WORK_BUFFER_ARRAY_SIZE \ |
| 82 | WUFFS_JSON__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE |
| 83 | #if WORK_BUFFER_ARRAY_SIZE > 0 |
| 84 | uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE]; |
| 85 | #else |
| 86 | // Not all C/C++ compilers support 0-length arrays. |
| 87 | uint8_t g_work_buffer_array[1]; |
| 88 | #endif |
| 89 | |
| 90 | #ifndef SRC_BUFFER_ARRAY_SIZE |
| 91 | #define SRC_BUFFER_ARRAY_SIZE (64 * 1024 * 1024) |
| 92 | #endif |
| 93 | #ifndef TOKEN_BUFFER_ARRAY_SIZE |
| 94 | #define TOKEN_BUFFER_ARRAY_SIZE (128 * 1024) |
| 95 | #endif |
| 96 | |
| 97 | uint8_t g_src_buffer_array[SRC_BUFFER_ARRAY_SIZE]; |
| 98 | wuffs_base__token g_tok_buffer_array[TOKEN_BUFFER_ARRAY_SIZE]; |
| 99 | |
| 100 | wuffs_base__io_buffer g_src; |
| 101 | wuffs_base__token_buffer g_tok; |
| 102 | |
| 103 | wuffs_json__decoder g_dec; |
| 104 | |
| 105 | #define TRY(error_msg) \ |
| 106 | do { \ |
| 107 | const char* z = error_msg; \ |
| 108 | if (z) { \ |
| 109 | return z; \ |
| 110 | } \ |
| 111 | } while (false) |
| 112 | |
| 113 | // ignore_return_value suppresses errors from -Wall -Werror. |
| 114 | static void // |
| 115 | ignore_return_value(int ignored) {} |
| 116 | |
| 117 | const char* // |
| 118 | read_src() { |
| 119 | if (g_src.meta.closed) { |
| 120 | return "main: internal error: read requested on a closed source"; |
| 121 | } |
| 122 | wuffs_base__io_buffer__compact(&g_src); |
| 123 | if (g_src.meta.wi >= g_src.data.len) { |
| 124 | return "main: g_src buffer is full"; |
| 125 | } |
| 126 | size_t n = fread(g_src.data.ptr + g_src.meta.wi, sizeof(uint8_t), |
| 127 | g_src.data.len - g_src.meta.wi, stdin); |
| 128 | g_src.meta.wi += n; |
| 129 | g_src.meta.closed = feof(stdin); |
| 130 | if ((n == 0) && !g_src.meta.closed) { |
| 131 | return "main: read error"; |
| 132 | } |
| 133 | return NULL; |
| 134 | } |
| 135 | |
| 136 | // ---- |
| 137 | |
| 138 | struct { |
| 139 | int remaining_argc; |
| 140 | char** remaining_argv; |
| 141 | |
| 142 | bool emit_number_str; |
| 143 | bool parse_number_f64; |
| 144 | bool render_number_f64; |
| 145 | } g_flags = {0}; |
| 146 | |
| 147 | const char* // |
| 148 | parse_flags(int argc, char** argv) { |
| 149 | int c = (argc > 0) ? 1 : 0; // Skip argv[0], the program name. |
| 150 | for (; c < argc; c++) { |
| 151 | char* arg = argv[c]; |
| 152 | if (*arg++ != '-') { |
| 153 | break; |
| 154 | } |
| 155 | |
| 156 | // A double-dash "--foo" is equivalent to a single-dash "-foo". As special |
| 157 | // cases, a bare "-" is not a flag (some programs may interpret it as |
| 158 | // stdin) and a bare "--" means to stop parsing flags. |
| 159 | if (*arg == '\x00') { |
| 160 | break; |
| 161 | } else if (*arg == '-') { |
| 162 | arg++; |
| 163 | if (*arg == '\x00') { |
| 164 | c++; |
| 165 | break; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | if (!strcmp(arg, "e") || !strcmp(arg, "emit-number-str")) { |
| 170 | g_flags.emit_number_str = true; |
| 171 | continue; |
| 172 | } |
| 173 | if (!strcmp(arg, "p") || !strcmp(arg, "parse-number-f64")) { |
| 174 | g_flags.parse_number_f64 = true; |
| 175 | continue; |
| 176 | } |
| 177 | if (!strcmp(arg, "r") || !strcmp(arg, "render-number-f64")) { |
| 178 | g_flags.render_number_f64 = true; |
| 179 | continue; |
| 180 | } |
| 181 | |
| 182 | return "main: unrecognized flag argument"; |
| 183 | } |
| 184 | |
| 185 | g_flags.remaining_argc = argc - c; |
| 186 | g_flags.remaining_argv = argv + c; |
| 187 | return NULL; |
| 188 | } |
| 189 | |
| 190 | const char* // |
| 191 | main1(int argc, char** argv) { |
| 192 | TRY(parse_flags(argc, argv)); |
| 193 | if (g_flags.remaining_argc > 0) { |
| 194 | return "main: bad argument: use \"program < input\", not \"program input\""; |
| 195 | } |
| 196 | |
| 197 | uint8_t new_line[1]; |
| 198 | new_line[0] = '\n'; |
| 199 | |
| 200 | g_src = wuffs_base__make_io_buffer( |
| 201 | wuffs_base__make_slice_u8(g_src_buffer_array, SRC_BUFFER_ARRAY_SIZE), |
| 202 | wuffs_base__empty_io_buffer_meta()); |
| 203 | |
| 204 | g_tok = wuffs_base__make_token_buffer( |
| 205 | wuffs_base__make_slice_token(g_tok_buffer_array, TOKEN_BUFFER_ARRAY_SIZE), |
| 206 | wuffs_base__empty_token_buffer_meta()); |
| 207 | |
| 208 | wuffs_base__status init_status = wuffs_json__decoder__initialize( |
| 209 | &g_dec, sizeof__wuffs_json__decoder(), WUFFS_VERSION, 0); |
| 210 | if (!wuffs_base__status__is_ok(&init_status)) { |
| 211 | return wuffs_base__status__message(&init_status); |
| 212 | } |
| 213 | |
| 214 | uint64_t pos = 0; |
| 215 | while (true) { |
| 216 | wuffs_base__status status = wuffs_json__decoder__decode_tokens( |
| 217 | &g_dec, &g_tok, &g_src, |
| 218 | wuffs_base__make_slice_u8(g_work_buffer_array, WORK_BUFFER_ARRAY_SIZE)); |
| 219 | |
| 220 | while (g_tok.meta.ri < g_tok.meta.wi) { |
| 221 | wuffs_base__token* t = &g_tok.data.ptr[g_tok.meta.ri++]; |
| 222 | uint64_t len = wuffs_base__token__length(t); |
| 223 | |
| 224 | if (wuffs_base__token__value_base_category(t) == |
| 225 | WUFFS_BASE__TOKEN__VBC__NUMBER) { |
| 226 | uint64_t buf_pos = pos - g_src.meta.pos; |
| 227 | uint64_t buf_len = g_src.data.len; |
| 228 | if ((buf_len < buf_pos) || ((buf_len - buf_pos) < len)) { |
| 229 | return "main: internal error: inconsistent token position/length"; |
| 230 | } |
| 231 | |
| 232 | if (g_flags.emit_number_str) { |
| 233 | const int stdout_fd = 1; |
| 234 | ignore_return_value(write(stdout_fd, &g_src.data.ptr[buf_pos], len)); |
| 235 | ignore_return_value(write(stdout_fd, &new_line[0], 1)); |
| 236 | } |
| 237 | |
| 238 | if (g_flags.parse_number_f64) { |
Nigel Tao | 27766ae | 2020-07-09 10:59:54 +1000 | [diff] [blame] | 239 | wuffs_base__result_f64 r; |
| 240 | |
Nigel Tao | 510c230 | 2020-09-07 22:59:45 +1000 | [diff] [blame] | 241 | #ifdef USE_LEMIRE_FAST_DOUBLE_PARSER |
Nigel Tao | 27766ae | 2020-07-09 10:59:54 +1000 | [diff] [blame] | 242 | // Wuffs (and its JSON parser) works with slices (pointer-length |
| 243 | // pairs) but fast_double_parser works with NUL-terminated strings. |
| 244 | char buf[1024]; |
| 245 | if (len > 1023) { |
| 246 | return "main: number-as-string is too long"; |
| 247 | } |
| 248 | memcpy(&buf[0], &g_src.data.ptr[buf_pos], len); |
| 249 | buf[len] = 0; |
| 250 | if (!fast_double_parser::decimal_separator_dot::parse_number( |
| 251 | &buf[0], &r.value)) { |
| 252 | return "main: could not parse number"; |
| 253 | } |
| 254 | r.status = wuffs_base__make_status(NULL); |
| 255 | #else |
| 256 | r = wuffs_base__parse_number_f64( |
Nigel Tao | 6b7ce30 | 2020-07-07 16:19:46 +1000 | [diff] [blame] | 257 | wuffs_base__make_slice_u8(&g_src.data.ptr[buf_pos], len), |
| 258 | WUFFS_BASE__PARSE_NUMBER_XXX__DEFAULT_OPTIONS); |
Nigel Tao | 0eee231 | 2020-07-03 12:48:37 +1000 | [diff] [blame] | 259 | if (!wuffs_base__status__is_ok(&r.status)) { |
| 260 | return wuffs_base__status__message(&r.status); |
| 261 | } |
Nigel Tao | 27766ae | 2020-07-09 10:59:54 +1000 | [diff] [blame] | 262 | #endif |
Nigel Tao | 0eee231 | 2020-07-03 12:48:37 +1000 | [diff] [blame] | 263 | |
| 264 | if (g_flags.render_number_f64) { |
| 265 | uint8_t render_buffer[2048]; |
| 266 | size_t n = wuffs_base__render_number_f64( |
| 267 | wuffs_base__make_slice_u8(&render_buffer[0], 2048), r.value, 0, |
| 268 | WUFFS_BASE__RENDER_NUMBER_FXX__JUST_ENOUGH_PRECISION); |
| 269 | if (n == 0) { |
| 270 | return "main: internal error: couldn't render_number_f64"; |
| 271 | } |
| 272 | } |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | pos += len; |
| 277 | if (0 > ((int64_t)pos)) { |
| 278 | return "main: input is too long"; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | if (status.repr == NULL) { |
| 283 | return NULL; |
| 284 | } else if (status.repr == wuffs_base__suspension__short_read) { |
| 285 | TRY(read_src()); |
| 286 | } else if (status.repr == wuffs_base__suspension__short_write) { |
| 287 | wuffs_base__token_buffer__compact(&g_tok); |
| 288 | } else { |
| 289 | return wuffs_base__status__message(&status); |
| 290 | } |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | // ---- |
| 295 | |
| 296 | int // |
| 297 | compute_exit_code(const char* status_msg) { |
| 298 | if (!status_msg) { |
| 299 | return 0; |
| 300 | } |
| 301 | size_t n = strnlen(status_msg, 2047); |
| 302 | if (n >= 2047) { |
| 303 | status_msg = "main: internal error: error message is too long"; |
| 304 | n = strnlen(status_msg, 2047); |
| 305 | } |
| 306 | fprintf(stderr, "%s\n", status_msg); |
Nigel Tao | a51867d | 2021-05-19 21:34:09 +1000 | [diff] [blame] | 307 | // Return an exit code of 1 for regular (foreseen) errors, e.g. badly |
Nigel Tao | 0eee231 | 2020-07-03 12:48:37 +1000 | [diff] [blame] | 308 | // formatted or unsupported input. |
| 309 | // |
| 310 | // Return an exit code of 2 for internal (exceptional) errors, e.g. defensive |
| 311 | // run-time checks found that an internal invariant did not hold. |
| 312 | // |
| 313 | // Automated testing, including badly formatted inputs, can therefore |
| 314 | // discriminate between expected failure (exit code 1) and unexpected failure |
| 315 | // (other non-zero exit codes). Specifically, exit code 2 for internal |
| 316 | // invariant violation, exit code 139 (which is 128 + SIGSEGV on x86_64 |
| 317 | // linux) for a segmentation fault (e.g. null pointer dereference). |
| 318 | return strstr(status_msg, "internal error:") ? 2 : 1; |
| 319 | } |
| 320 | |
| 321 | int // |
| 322 | main(int argc, char** argv) { |
| 323 | const char* z = main1(argc, argv); |
| 324 | int exit_code = compute_exit_code(z); |
| 325 | return exit_code; |
| 326 | } |