blob: dc9ab342f8b124c6679d13ce6fdf96c8d2ada3f1 [file] [log] [blame]
Nigel Tao9263c802020-02-22 07:24:22 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Nigel Tao3b17d5b2020-02-26 12:53:10 +110015pub status "#bad C0 control code"
16pub status "#bad UTF-8"
17pub status "#bad backslash-escape"
Nigel Tao9263c802020-02-22 07:24:22 +110018pub status "#bad input"
Nigel Tao6bc3f572020-08-29 23:23:15 +100019pub status "#bad new-line in a string"
Nigel Tao1adbc952020-08-06 23:28:07 +100020pub status "#bad quirk combination"
Nigel Tao9263c802020-02-22 07:24:22 +110021pub status "#unsupported number length"
22pub status "#unsupported recursion depth"
23
24pri status "#internal error: inconsistent I/O"
25
Nigel Tao28ef6a72020-08-10 22:29:08 +100026// --------
27
28// DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE is the largest workbuf length that a
29// decoder will request.
Nigel Tao532e18c2020-04-14 14:38:07 +100030pub const DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE : base.u64 = 0
Nigel Taof3146c22020-03-26 08:47:42 +110031
Nigel Tao74871342020-04-13 15:52:36 +100032// DECODER_DEPTH_MAX_INCL is the maximum supported recursion depth: how deeply
Nigel Tao21682f42020-02-29 23:11:33 +110033// nested [] arrays and {} objects can be.
34//
35// The JSON spec itself does not define a limit, but allows implementations to
36// set their own limits.
Nigel Tao532e18c2020-04-14 14:38:07 +100037pub const DECODER_DEPTH_MAX_INCL : base.u64 = 1024
Nigel Tao21682f42020-02-29 23:11:33 +110038
Nigel Tao74871342020-04-13 15:52:36 +100039// DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL is the minimum length of the dst
Nigel Tao21682f42020-02-29 23:11:33 +110040// wuffs_base__token_buffer passed to the decoder.
Nigel Tao532e18c2020-04-14 14:38:07 +100041pub const DECODER_DST_TOKEN_BUFFER_LENGTH_MIN_INCL : base.u64 = 1
Nigel Tao21682f42020-02-29 23:11:33 +110042
Nigel Tao74871342020-04-13 15:52:36 +100043// DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL is the minimum length of the src
Nigel Tao21682f42020-02-29 23:11:33 +110044// wuffs_base__io_buffer passed to the decoder.
45//
46// This constrains the src.data.len field and it is the io_buffer capacity (the
47// maximum possible src.meta.ri and src.meta.wi values). It is a property of
48// the backing array's length, not the length of the JSON-formatted input per
49// se. It is perfectly valid to decode "[1,2]" (of length 5) as JSON, as long
Nigel Tao84bb3af2020-07-07 23:29:30 +100050// as that content is placed in an io_buffer whose data.len is at least 100.
51pub const DECODER_SRC_IO_BUFFER_LENGTH_MIN_INCL : base.u64 = 100
Nigel Tao21682f42020-02-29 23:11:33 +110052
Nigel Tao74871342020-04-13 15:52:36 +100053// DECODER_NUMBER_LENGTH_MAX_INCL is the longest supported byte length for a
Nigel Taoa5184ed2020-03-06 21:05:44 +110054// JSON number. Unlike JSON strings, this package's tokenizer never splits a
55// single JSON number into multiple tokens, as this simplifies the callers.
56// They can then call e.g. wuffs_base__parse_number_i64 without having to
57// reconstitute a contiguous string representation.
58//
59// The JSON spec itself does not define a limit, but allows implementations to
60// set their own limits.
Nigel Tao84bb3af2020-07-07 23:29:30 +100061pri const DECODER_NUMBER_LENGTH_MAX_INCL : base.u64 = 99
Nigel Taoa5184ed2020-03-06 21:05:44 +110062
63// --------
64
Nigel Tao9f0eb262020-02-26 11:43:50 +110065// Look-Up Tables (LUTs).
66
Nigel Tao74871342020-04-13 15:52:36 +100067// LUT_BACKSLASHES[i] helps decode "\i", for various 'i's.
Nigel Tao97f7f282020-03-19 14:27:09 +110068//
69// If the element's 0x80 bit is set then "\i" is unconditionally a valid
70// single-output-byte backslash-escape. The low 7 bits are the unescaped value.
Nigel Tao74871342020-04-13 15:52:36 +100071// For example, LUT_BACKSLASHES['n'] is (0x80 | 0x0A), because "\n" is U+000A.
Nigel Tao97f7f282020-03-19 14:27:09 +110072//
73// If the element is non-zero (but the 0x80 bit is not set) then "\i"'s
74// validity depends on the relevant quirk. The element's value is an enum:
Nigel Taoe39f3cb2020-04-14 23:03:18 +100075// - 1: "\a", U+0007, QUIRK_ALLOW_BACKSLASH_A.
76// - 2: "\e", U+001B, QUIRK_ALLOW_BACKSLASH_E.
77// - 3: "backslash new_line(not_n)", U+000A, QUIRK_ALLOW_BACKSLASH_NEW_LINE.
78// - 4: "\?", U+003F, QUIRK_ALLOW_BACKSLASH_QUESTION_MARK.
79// - 5: "\'", U+0027, QUIRK_ALLOW_BACKSLASH_SINGLE_QUOTE.
80// - 6: "\v", U+000B, QUIRK_ALLOW_BACKSLASH_V.
81// - 7: "\0", U+0000, QUIRK_ALLOW_BACKSLASH_ZERO.
82// The quirk and U+1234 values are held in LUT_QUIRKY_BACKSLASHES_QUIRKS and
83// LUT_QUIRKY_BACKSLASHES_CHARS, below.
Nigel Tao97f7f282020-03-19 14:27:09 +110084//
85// If the element is zero then "\i" is invalid, or it is a special case, the
86// start of "\x12", "\u1234" or "\U12345678".
Nigel Tao532e18c2020-04-14 14:38:07 +100087pri const LUT_BACKSLASHES : array[256] base.u8 = [
Nigel Tao48e70042020-02-22 08:07:26 +110088 // 0 1 2 3 4 5 6 7
89 // 8 9 A B C D E F
Nigel Tao9263c802020-02-22 07:24:22 +110090 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00 ..= 0x07.
Nigel Tao2db72da2020-04-07 17:10:16 +100091 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x08 ..= 0x0F. '\n'.
Nigel Tao9263c802020-02-22 07:24:22 +110092 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x10 ..= 0x17.
93 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x18 ..= 0x1F.
Nigel Tao2db72da2020-04-07 17:10:16 +100094 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x05, // 0x20 ..= 0x27. '"', '\''.
Nigel Tao9263c802020-02-22 07:24:22 +110095 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAF, // 0x28 ..= 0x2F. '/'.
Nigel Tao2db72da2020-04-07 17:10:16 +100096 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x30 ..= 0x37. '0'.
97 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, // 0x38 ..= 0x3F. '?'
Nigel Tao9263c802020-02-22 07:24:22 +110098
99 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x40 ..= 0x47.
100 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x48 ..= 0x4F.
101 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x50 ..= 0x57.
102 0x00, 0x00, 0x00, 0x00, 0xDC, 0x00, 0x00, 0x00, // 0x58 ..= 0x5F. '\\'.
Nigel Tao97f7f282020-03-19 14:27:09 +1100103 0x00, 0x01, 0x88, 0x00, 0x00, 0x02, 0x8C, 0x00, // 0x60 ..= 0x67. 'a', 'b', 'e', 'f'.
Nigel Tao9263c802020-02-22 07:24:22 +1100104 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8A, 0x00, // 0x68 ..= 0x6F. 'n'.
Nigel Tao2db72da2020-04-07 17:10:16 +1000105 0x00, 0x00, 0x8D, 0x00, 0x89, 0x00, 0x06, 0x00, // 0x70 ..= 0x77. 'r', 't', 'v'.
Nigel Tao9263c802020-02-22 07:24:22 +1100106 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x78 ..= 0x7F.
107
108 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x80 ..= 0x87.
109 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x88 ..= 0x8F.
110 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x90 ..= 0x97.
111 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x98 ..= 0x9F.
112 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xA0 ..= 0xA7.
113 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xA8 ..= 0xAF.
114 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xB0 ..= 0xB7.
115 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xB8 ..= 0xBF.
116
117 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xC0 ..= 0xC7.
118 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xC8 ..= 0xCF.
119 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xD0 ..= 0xD7.
120 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xD8 ..= 0xDF.
121 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xE0 ..= 0xE7.
122 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xE8 ..= 0xEF.
123 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF0 ..= 0xF7.
124 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF8 ..= 0xFF.
Nigel Tao48e70042020-02-22 08:07:26 +1100125 // 0 1 2 3 4 5 6 7
126 // 8 9 A B C D E F
127]
128
Nigel Taoe39f3cb2020-04-14 23:03:18 +1000129// LUT_QUIRKY_BACKSLASHES_QUIRKS is discussed in the LUT_BACKSLASHES comment.
130// The first element (index 0) is not used, but 8 is a round power of 2, so
Nigel Tao2db72da2020-04-07 17:10:16 +1000131// enforcing index-in-bounds is a simple "&7" operation.
Nigel Tao1adbc952020-08-06 23:28:07 +1000132pri const LUT_QUIRKY_BACKSLASHES_QUIRKS : array[8] base.u8[..= 0x0A] = [
Nigel Taoe39f3cb2020-04-14 23:03:18 +1000133 0,
Nigel Tao1f424b22020-04-16 09:37:05 +1000134 // Uncommenting the following lines requires being able to define consts in
135 // terms of other consts, which could be non-trivial to specify and
136 // implement (to avoid infinite loops).
Nigel Taoe39f3cb2020-04-14 23:03:18 +1000137 0x01, // (QUIRK_ALLOW_BACKSLASH_A - QUIRKS_BASE) as base.u8,
138 0x03, // (QUIRK_ALLOW_BACKSLASH_E - QUIRKS_BASE) as base.u8,
139 0x04, // (QUIRK_ALLOW_BACKSLASH_NEW_LINE - QUIRKS_BASE) as base.u8,
140 0x05, // (QUIRK_ALLOW_BACKSLASH_QUESTION_MARK - QUIRKS_BASE) as base.u8,
141 0x06, // (QUIRK_ALLOW_BACKSLASH_SINGLE_QUOTE - QUIRKS_BASE) as base.u8,
142 0x07, // (QUIRK_ALLOW_BACKSLASH_V - QUIRKS_BASE) as base.u8,
Nigel Tao1adbc952020-08-06 23:28:07 +1000143 0x0A, // (QUIRK_ALLOW_BACKSLASH_ZERO - QUIRKS_BASE) as base.u8,
Nigel Taoe39f3cb2020-04-14 23:03:18 +1000144]
145
146// LUT_QUIRKY_BACKSLASHES_CHARS is discussed in the LUT_BACKSLASHES comment.
147// The first element (index 0) is not used, but 8 is a round power of 2, so
148// enforcing index-in-bounds is a simple "&7" operation.
149pri const LUT_QUIRKY_BACKSLASHES_CHARS : array[8] base.u8 = [
Nigel Tao2db72da2020-04-07 17:10:16 +1000150 0x00, 0x07, 0x1B, 0x0A, 0x3F, 0x27, 0x0B, 0x00,
Nigel Tao97f7f282020-03-19 14:27:09 +1100151]
152
Nigel Tao74871342020-04-13 15:52:36 +1000153// LUT_CHARS helps decode bytes within a string:
Nigel Tao3b17d5b2020-02-26 12:53:10 +1100154// - 0x00 is 1-byte UTF-8 (ASCII) but not '"', '\\' or a C0 control code.
155// - 0x01 is '"'.
156// - 0x02 is '\\'.
157// - 0x03 is the start of 2-byte UTF-8.
158// - 0x04 is the start of 3-byte UTF-8.
159// - 0x05 is the start of 4-byte UTF-8.
160// - 0x10 is a UTF-8 tail byte.
Nigel Taod83bd8d2020-04-07 17:50:47 +1000161// - 0x20 is invalid UTF-8.
162// - 0x80 and above is invalid JSON (C0 control codes).
Nigel Tao3b17d5b2020-02-26 12:53:10 +1100163//
164// RFC 3629 (UTF-8) gives this grammar for valid UTF-8:
165// UTF8-1 = %x00-7F
166// UTF8-2 = %xC2-DF UTF8-tail
167// UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
168// %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
169// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
170// %xF4 %x80-8F 2( UTF8-tail )
171// UTF8-tail = %x80-BF
Nigel Tao532e18c2020-04-14 14:38:07 +1000172pri const LUT_CHARS : array[256] base.u8 = [
Nigel Tao3b17d5b2020-02-26 12:53:10 +1100173 // 0 1 2 3 4 5 6 7
174 // 8 9 A B C D E F
Nigel Taod83bd8d2020-04-07 17:50:47 +1000175 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, // 0x00 ..= 0x07. C0 control codes.
176 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, // 0x08 ..= 0x0F. C0 control codes.
177 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, // 0x10 ..= 0x17. C0 control codes.
178 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, // 0x18 ..= 0x1F. C0 control codes.
Nigel Tao3b17d5b2020-02-26 12:53:10 +1100179 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x20 ..= 0x27. UTF-8-1; '"'.
180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x28 ..= 0x2F. UTF-8-1.
181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x30 ..= 0x37. UTF-8-1.
182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x38 ..= 0x3F. UTF-8-1.
183
184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x40 ..= 0x47. UTF-8-1.
185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x48 ..= 0x4F. UTF-8-1.
186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x50 ..= 0x57. UTF-8-1.
187 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // 0x58 ..= 0x5F. UTF-8-1; '\\'.
188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x60 ..= 0x67. UTF-8-1.
189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x68 ..= 0x6F. UTF-8-1.
190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x70 ..= 0x77. UTF-8-1.
191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x78 ..= 0x7F. UTF-8-1.
192
193 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x80 ..= 0x87. UTF-8 tail.
194 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x88 ..= 0x8F. UTF-8 tail.
195 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x90 ..= 0x97. UTF-8 tail.
196 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x98 ..= 0x9F. UTF-8 tail.
197 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0xA0 ..= 0xA7. UTF-8 tail.
198 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0xA8 ..= 0xAF. UTF-8 tail.
199 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0xB0 ..= 0xB7. UTF-8 tail.
200 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0xB8 ..= 0xBF. UTF-8 tail.
201
Nigel Taod83bd8d2020-04-07 17:50:47 +1000202 0x20, 0x20, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // 0xC0 ..= 0xC7. Invalid UTF-8; UTF-8-2.
Nigel Tao3b17d5b2020-02-26 12:53:10 +1100203 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // 0xC8 ..= 0xCF. UTF-8-2.
204 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // 0xD0 ..= 0xD7. UTF-8-2.
205 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, // 0xD8 ..= 0xDF. UTF-8-2.
206 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, // 0xE0 ..= 0xE7. UTF-8-3.
207 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, // 0xE8 ..= 0xEF. UTF-8-3.
Nigel Taod83bd8d2020-04-07 17:50:47 +1000208 0x05, 0x05, 0x05, 0x05, 0x05, 0x20, 0x20, 0x20, // 0xF0 ..= 0xF7. UTF-8-4; Invalid UTF-8.
209 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, // 0xF8 ..= 0xFF. Invalid UTF-8.
Nigel Tao3b17d5b2020-02-26 12:53:10 +1100210 // 0 1 2 3 4 5 6 7
211 // 8 9 A B C D E F
212]
213
Nigel Tao532e18c2020-04-14 14:38:07 +1000214pri const CLASS_WHITESPACE : base.u8 = 0x00
215pri const CLASS_STRING : base.u8 = 0x01
216pri const CLASS_COMMA : base.u8 = 0x02
217pri const CLASS_COLON : base.u8 = 0x03
218pri const CLASS_NUMBER : base.u8 = 0x04
219pri const CLASS_OPEN_CURLY_BRACE : base.u8 = 0x05
220pri const CLASS_CLOSE_CURLY_BRACE : base.u8 = 0x06
221pri const CLASS_OPEN_SQUARE_BRACKET : base.u8 = 0x07
222pri const CLASS_CLOSE_SQUARE_BRACKET : base.u8 = 0x08
223pri const CLASS_FALSE : base.u8 = 0x09
224pri const CLASS_TRUE : base.u8 = 0x0A
225pri const CLASS_NULL_NAN_INF : base.u8 = 0x0B
226pri const CLASS_COMMENT : base.u8 = 0x0C
Nigel Tao80071732020-04-13 16:06:16 +1000227
Nigel Tao66b0a122020-06-09 23:48:54 +1000228// EXPECT_ETC are unions of LUT_CLASSES bitmasks. See LUT_CLASSES below.
229//
230// Bitwise or'ing these together gives 0x1FFE. Whitespace is never expected, as
231// it is handled separately.
232//
233// EXPECT_VALUE is also defined to be 0x1EB2, equivalent to (EXPECT_STRING |
234// EXPECT_NON_STRING_VALUE).
235//
236// "Non-string value" includes literals (false, true, null), numbers, arrays
237// and objects.
238//
239// "String value" includes "this" and "th\u0061t".
240//
241// Comments are always expected. Whether the relevant quirks are enabled are
242// checked elsewhere.
243pri const EXPECT_VALUE : base.u32 = 0x1EB2
244pri const EXPECT_NON_STRING_VALUE : base.u32 = 0x1EB0
245pri const EXPECT_STRING : base.u32 = 0x1002
246pri const EXPECT_COMMA : base.u32 = 0x1004
247pri const EXPECT_COLON : base.u32 = 0x1008
248pri const EXPECT_NUMBER : base.u32 = 0x1010
249pri const EXPECT_CLOSE_CURLY_BRACE : base.u32 = 0x1040
250pri const EXPECT_CLOSE_SQUARE_BRACKET : base.u32 = 0x1100
251
Nigel Tao74871342020-04-13 15:52:36 +1000252// LUT_CLASSES is:
Nigel Taod1a4abe2020-02-22 09:03:07 +1100253// - 0x00 (bitmask 0x0001) is CLASS_WHITESPACE.
254// - 0x01 (bitmask 0x0002) is CLASS_STRING.
255// - 0x02 (bitmask 0x0004) is CLASS_COMMA.
256// - 0x03 (bitmask 0x0008) is CLASS_COLON.
257// - 0x04 (bitmask 0x0010) is CLASS_NUMBER.
258// - 0x05 (bitmask 0x0020) is CLASS_OPEN_CURLY_BRACE.
259// - 0x06 (bitmask 0x0040) is CLASS_CLOSE_CURLY_BRACE.
260// - 0x07 (bitmask 0x0080) is CLASS_OPEN_SQUARE_BRACKET.
261// - 0x08 (bitmask 0x0100) is CLASS_CLOSE_SQUARE_BRACKET.
262// - 0x09 (bitmask 0x0200) is CLASS_FALSE.
263// - 0x0A (bitmask 0x0400) is CLASS_TRUE.
Nigel Taod7c72372020-03-24 13:58:38 +1100264// - 0x0B (bitmask 0x0800) is CLASS_NULL_NAN_INF.
Nigel Tao21f6a5d2020-03-21 22:26:27 +1100265// - 0x0C (bitmask 0x1000) is CLASS_COMMENT.
Nigel Taod1a4abe2020-02-22 09:03:07 +1100266// - 0x0D (bitmask 0x2000) is reserved.
267// - 0x0E (bitmask 0x4000) is reserved.
268// - 0x0F (bitmask 0x8000) is CLASS_BAD_INPUT.
269//
270// The bitmasks are used by the "expect" variable: what the next character
Nigel Tao66b0a122020-06-09 23:48:54 +1000271// class can be. See EXPECT_ETC above.
Nigel Tao532e18c2020-04-14 14:38:07 +1000272pri const LUT_CLASSES : array[256] base.u8[..= 0x0F] = [
Nigel Tao48e70042020-02-22 08:07:26 +1100273 // 0 1 2 3 4 5 6 7
274 // 8 9 A B C D E F
275 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x00 ..= 0x07.
276 0x0F, 0x00, 0x00, 0x0F, 0x0F, 0x00, 0x0F, 0x0F, // 0x08 ..= 0x0F. '\t', '\n', '\r'.
277 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x10 ..= 0x17.
278 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x18 ..= 0x1F.
279 0x00, 0x0F, 0x01, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x20 ..= 0x27. ' ', '"'.
Nigel Taod7c72372020-03-24 13:58:38 +1100280 0x0F, 0x0F, 0x0F, 0x0B, 0x02, 0x04, 0x0F, 0x0C, // 0x28 ..= 0x2F. '+', ',', '-', '/'.
Nigel Tao48e70042020-02-22 08:07:26 +1100281 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, // 0x30 ..= 0x37. '0'-'7'.
282 0x04, 0x04, 0x03, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x38 ..= 0x3F. '8'-'9', ':'.
283
284 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x40 ..= 0x47.
Nigel Taod7c72372020-03-24 13:58:38 +1100285 0x0F, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F, 0x0B, 0x0F, // 0x48 ..= 0x4F. 'I', 'N'.
Nigel Tao48e70042020-02-22 08:07:26 +1100286 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x50 ..= 0x57.
287 0x0F, 0x0F, 0x0F, 0x07, 0x0F, 0x08, 0x0F, 0x0F, // 0x58 ..= 0x5F. '[', ']'.
288 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x09, 0x0F, // 0x60 ..= 0x67. 'f'.
Nigel Taod7c72372020-03-24 13:58:38 +1100289 0x0F, 0x0B, 0x0F, 0x0F, 0x0F, 0x0F, 0x0B, 0x0F, // 0x68 ..= 0x6F. 'i', 'n'.
Nigel Tao48e70042020-02-22 08:07:26 +1100290 0x0F, 0x0F, 0x0F, 0x0F, 0x0A, 0x0F, 0x0F, 0x0F, // 0x70 ..= 0x77. 't'.
291 0x0F, 0x0F, 0x0F, 0x05, 0x0F, 0x06, 0x0F, 0x0F, // 0x78 ..= 0x7F. '{', '}'.
292
293 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x80 ..= 0x87.
294 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x88 ..= 0x8F.
295 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x90 ..= 0x97.
296 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0x98 ..= 0x9F.
297 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xA0 ..= 0xA7.
298 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xA8 ..= 0xAF.
299 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xB0 ..= 0xB7.
300 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xB8 ..= 0xBF.
301
302 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xC0 ..= 0xC7.
303 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xC8 ..= 0xCF.
304 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xD0 ..= 0xD7.
305 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xD8 ..= 0xDF.
306 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xE0 ..= 0xE7.
307 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xE8 ..= 0xEF.
308 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xF0 ..= 0xF7.
309 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, // 0xF8 ..= 0xFF.
310 // 0 1 2 3 4 5 6 7
311 // 8 9 A B C D E F
Nigel Tao9263c802020-02-22 07:24:22 +1100312]
313
Nigel Tao532e18c2020-04-14 14:38:07 +1000314pri const LUT_DECIMAL_DIGITS : array[256] base.u8 = [
Nigel Tao771d4f92020-03-05 11:17:57 +1100315 // 0 1 2 3 4 5 6 7
316 // 8 9 A B C D E F
317 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00 ..= 0x07.
318 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x08 ..= 0x0F.
319 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x10 ..= 0x17.
320 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x18 ..= 0x1F.
321 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x20 ..= 0x27.
322 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x28 ..= 0x2F.
323 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, // 0x30 ..= 0x37. '0'-'7'.
324 0x88, 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x38 ..= 0x3F. '8'-'9'.
325
326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x40 ..= 0x47.
327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x48 ..= 0x4F.
328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x50 ..= 0x57.
329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x58 ..= 0x5F.
330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x60 ..= 0x67.
331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x68 ..= 0x6F.
332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x70 ..= 0x77.
333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x78 ..= 0x7F.
334
335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x80 ..= 0x87.
336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x88 ..= 0x8F.
337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x90 ..= 0x97.
338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x98 ..= 0x9F.
339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xA0 ..= 0xA7.
340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xA8 ..= 0xAF.
341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xB0 ..= 0xB7.
342 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xB8 ..= 0xBF.
343
344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xC0 ..= 0xC7.
345 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xC8 ..= 0xCF.
346 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xD0 ..= 0xD7.
347 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xD8 ..= 0xDF.
348 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xE0 ..= 0xE7.
349 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xE8 ..= 0xEF.
350 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF0 ..= 0xF7.
351 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF8 ..= 0xFF.
352 // 0 1 2 3 4 5 6 7
353 // 8 9 A B C D E F
354]
355
Nigel Tao532e18c2020-04-14 14:38:07 +1000356pri const LUT_HEXADECIMAL_DIGITS : array[256] base.u8 = [
Nigel Tao48e70042020-02-22 08:07:26 +1100357 // 0 1 2 3 4 5 6 7
358 // 8 9 A B C D E F
Nigel Tao9263c802020-02-22 07:24:22 +1100359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00 ..= 0x07.
360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x08 ..= 0x0F.
361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x10 ..= 0x17.
362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x18 ..= 0x1F.
363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x20 ..= 0x27.
364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x28 ..= 0x2F.
365 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, // 0x30 ..= 0x37. '0'-'7'.
366 0x88, 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x38 ..= 0x3F. '8'-'9'.
367
368 0x00, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x00, // 0x40 ..= 0x47. 'A'-'F'.
369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x48 ..= 0x4F.
370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x50 ..= 0x57.
371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x58 ..= 0x5F.
372 0x00, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x00, // 0x60 ..= 0x67. 'a'-'f'.
373 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x68 ..= 0x6F.
374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x70 ..= 0x77.
375 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x78 ..= 0x7F.
376
377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x80 ..= 0x87.
378 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x88 ..= 0x8F.
379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x90 ..= 0x97.
380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x98 ..= 0x9F.
381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xA0 ..= 0xA7.
382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xA8 ..= 0xAF.
383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xB0 ..= 0xB7.
384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xB8 ..= 0xBF.
385
386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xC0 ..= 0xC7.
387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xC8 ..= 0xCF.
388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xD0 ..= 0xD7.
389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xD8 ..= 0xDF.
390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xE0 ..= 0xE7.
391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xE8 ..= 0xEF.
392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF0 ..= 0xF7.
393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF8 ..= 0xFF.
Nigel Tao48e70042020-02-22 08:07:26 +1100394 // 0 1 2 3 4 5 6 7
395 // 8 9 A B C D E F
Nigel Tao9263c802020-02-22 07:24:22 +1100396]