blob: 76f8b6276eb410d028b379b1054ea37ee84c8c1d [file] [log] [blame]
Nigel Tao737e31f2020-02-11 11:23:17 +11001// After editing this file, run "go generate" in the parent directory.
2
3// Copyright 2020 The Wuffs Authors.
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// https://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// ---------------- Tokens
18
Nigel Tao737e31f2020-02-11 11:23:17 +110019typedef struct {
Nigel Taoa9d14882020-02-25 12:12:31 +110020 // The repr is divided as:
21 // - Bits 63 .. 40 (24 bits) is the major value.
22 // - Bits 39 .. 16 (24 bits) is the minor value.
23 // - Bits 15 .. 0 (16 bits) is the length.
24 //
25 // The major value is a [Base38](doc/note/base38-and-fourcc.md) value. If
26 // zero (special cased for Wuffs' built-in "base" package) then the minor
27 // value is further sub-divided:
28 // - Bits 39 .. 37 ( 3 bits) is the value_base_category.
29 // - Bits 36 .. 16 (21 bits) is the value_base_detail.
30 //
31 // In particular, at 21 bits, the value_base_detail can hold every valid
32 // Unicode code point.
33 //
34 // If the major value is non-zero then the minor value has whatever arbitrary
35 // meaning the tokenizer's package assigns to it.
Nigel Tao737e31f2020-02-11 11:23:17 +110036 uint64_t repr;
Nigel Tao36857982020-02-12 11:33:13 +110037
38#ifdef __cplusplus
39 inline uint64_t value() const;
40 inline uint64_t value_major() const;
41 inline uint64_t value_minor() const;
42 inline uint64_t value_base_category() const;
43 inline uint64_t value_base_detail() const;
44 inline uint64_t length() const;
45#endif // __cplusplus
46
Nigel Tao737e31f2020-02-11 11:23:17 +110047} wuffs_base__token;
48
49static inline wuffs_base__token //
50wuffs_base__make_token(uint64_t repr) {
51 wuffs_base__token ret;
52 ret.repr = repr;
53 return ret;
54}
55
Nigel Taoa9d14882020-02-25 12:12:31 +110056#define WUFFS_BASE__TOKEN__VALUE__MASK 0xFFFFFFFFFFFF
57#define WUFFS_BASE__TOKEN__VALUE_MAJOR__MASK 0xFFFFFF
58#define WUFFS_BASE__TOKEN__VALUE_MINOR__MASK 0xFFFFFF
59#define WUFFS_BASE__TOKEN__VALUE_BASE_CATEGORY__MASK 0x7FFFFFF
60#define WUFFS_BASE__TOKEN__VALUE_BASE_DETAIL__MASK 0x1FFFFF
61#define WUFFS_BASE__TOKEN__LENGTH__MASK 0xFFFF
62
63#define WUFFS_BASE__TOKEN__VALUE__SHIFT 16
64#define WUFFS_BASE__TOKEN__VALUE_MAJOR__SHIFT 40
65#define WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT 16
66#define WUFFS_BASE__TOKEN__VALUE_BASE_CATEGORY__SHIFT 37
67#define WUFFS_BASE__TOKEN__VALUE_BASE_DETAIL__SHIFT 16
68#define WUFFS_BASE__TOKEN__LENGTH__SHIFT 0
69
70#define WUFFS_BASE__TOKEN__VBC__FILLER 0
71#define WUFFS_BASE__TOKEN__VBC__STRING 1
72#define WUFFS_BASE__TOKEN__VBC__BYTES 2
73#define WUFFS_BASE__TOKEN__VBC__STRUCTURE 3
74#define WUFFS_BASE__TOKEN__VBC__NUMBER 4
75#define WUFFS_BASE__TOKEN__VBC__UNICODE_CODE_POINT 5
76
77// Bits 0x2, 0x4 and 0x8 are reserved for flags that are common between
78// VBD_FILLER, VBD_STRING and VBD_BYTES.
79#define WUFFS_BASE__TOKEN__VBD__FILLER__INCOMPLETE 0x00001
80#define WUFFS_BASE__TOKEN__VBD__FILLER__END_OF_CONSECUTIVE_COMMENTS 0x00010
81#define WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_LINE 0x00020
82#define WUFFS_BASE__TOKEN__VBD__FILLER__COMMENT_BLOCK 0x00040
83
84// Bits 0x2, 0x4 and 0x8 are reserved for flags that are common between
85// VBD_FILLER, VBD_STRING and VBD_BYTES.
86#define WUFFS_BASE__TOKEN__VBD__STRING__INCOMPLETE 0x00001
87#define WUFFS_BASE__TOKEN__VBD__STRING__ALL_ASCII 0x00010
88
89// Bits 0x2, 0x4 and 0x8 are reserved for flags that are common between
90// VBD_FILLER, VBD_STRING and VBD_BYTES.
91#define WUFFS_BASE__TOKEN__VBD__BYTES__INCOMPLETE 0x00001
92// "D_DST_S_SRC" means that it takes S source bytes (possibly padded) to
93// produce D destination bytes. For example,
94// WUFFS_BASE__TOKEN__VBD__BYTES__1_DST_4_SRC_BACKSLASH_X means that the source
95// looks like "\\x23\\x67\\xAB", where 12 src bytes encode 3 dst bytes.
96#define WUFFS_BASE__TOKEN__VBD__BYTES__1_DST_1_SRC_RAW 0x00010
97#define WUFFS_BASE__TOKEN__VBD__BYTES__1_DST_2_SRC_HEX 0x00020
98#define WUFFS_BASE__TOKEN__VBD__BYTES__1_DST_4_SRC_BACKSLASH_X 0x00040
99#define WUFFS_BASE__TOKEN__VBD__BYTES__1_DST_6_SRC_BACKSLASH_U 0x00080
100#define WUFFS_BASE__TOKEN__VBD__BYTES__3_DST_4_SRC_BASE_64_STD 0x00100
101#define WUFFS_BASE__TOKEN__VBD__BYTES__3_DST_4_SRC_BASE_64_URL 0x00200
102#define WUFFS_BASE__TOKEN__VBD__BYTES__4_DST_5_SRC_ASCII_85 0x00400
103
104#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__PUSH 0x00001
105#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__POP 0x00002
106#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_NONE 0x00010
107#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_LIST 0x00020
108#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__FROM_DICT 0x00040
109#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_NONE 0x01000
110#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_LIST 0x02000
111#define WUFFS_BASE__TOKEN__VBD__STRUCTURE__TO_DICT 0x04000
112
113#define WUFFS_BASE__TOKEN__VBD__NUMBER__LITERAL 0x00001
114#define WUFFS_BASE__TOKEN__VBD__NUMBER__LITERAL__UNDEFINED 0x00101
115#define WUFFS_BASE__TOKEN__VBD__NUMBER__LITERAL__NULL 0x00201
116#define WUFFS_BASE__TOKEN__VBD__NUMBER__LITERAL__FALSE 0x00401
117#define WUFFS_BASE__TOKEN__VBD__NUMBER__LITERAL__TRUE 0x00801
118// For a source string of "123" or "0x9A", it is valid for a tokenizer to
119// return any one of:
120// - WUFFS_BASE__TOKEN__VBD__NUMBER__FLOATING_POINT.
121// - WUFFS_BASE__TOKEN__VBD__NUMBER__INTEGER_SIGNED.
122// - WUFFS_BASE__TOKEN__VBD__NUMBER__INTEGER_UNSIGNED.
123//
124// For a source string of "+123" or "-0x9A", only the first two are valid.
125//
126// For a source string of "123.", only the first one is valid.
127#define WUFFS_BASE__TOKEN__VBD__NUMBER__FLOATING_POINT 0x00002
128#define WUFFS_BASE__TOKEN__VBD__NUMBER__INTEGER_SIGNED 0x00004
129#define WUFFS_BASE__TOKEN__VBD__NUMBER__INTEGER_UNSIGNED 0x00008
130
131#define WUFFS_BASE__TOKEN__VBD__UNICODE_CODE_POINT__MAX_INCL 0x10FFFF
132
Nigel Tao36857982020-02-12 11:33:13 +1100133static inline uint64_t //
134wuffs_base__token__value(const wuffs_base__token* t) {
135 return (t->repr >> WUFFS_BASE__TOKEN__VALUE__SHIFT) &
136 WUFFS_BASE__TOKEN__VALUE__MASK;
137}
138
139static inline uint64_t //
140wuffs_base__token__value_major(const wuffs_base__token* t) {
141 return (t->repr >> WUFFS_BASE__TOKEN__VALUE_MAJOR__SHIFT) &
142 WUFFS_BASE__TOKEN__VALUE_MAJOR__MASK;
143}
144
145static inline uint64_t //
146wuffs_base__token__value_minor(const wuffs_base__token* t) {
147 return (t->repr >> WUFFS_BASE__TOKEN__VALUE_MINOR__SHIFT) &
148 WUFFS_BASE__TOKEN__VALUE_MINOR__MASK;
149}
150
151static inline uint64_t //
152wuffs_base__token__value_base_category(const wuffs_base__token* t) {
153 return (t->repr >> WUFFS_BASE__TOKEN__VALUE_BASE_CATEGORY__SHIFT) &
154 WUFFS_BASE__TOKEN__VALUE_BASE_CATEGORY__MASK;
155}
156
157static inline uint64_t //
158wuffs_base__token__value_base_detail(const wuffs_base__token* t) {
159 return (t->repr >> WUFFS_BASE__TOKEN__VALUE_BASE_DETAIL__SHIFT) &
160 WUFFS_BASE__TOKEN__VALUE_BASE_DETAIL__MASK;
161}
162
163static inline uint64_t //
164wuffs_base__token__length(const wuffs_base__token* t) {
165 return (t->repr >> WUFFS_BASE__TOKEN__LENGTH__SHIFT) &
166 WUFFS_BASE__TOKEN__LENGTH__MASK;
167}
168
169#ifdef __cplusplus
170
171inline uint64_t //
172wuffs_base__token::value() const {
173 return wuffs_base__token__value(this);
174}
175
176inline uint64_t //
177wuffs_base__token::value_major() const {
178 return wuffs_base__token__value_major(this);
179}
180
181inline uint64_t //
182wuffs_base__token::value_minor() const {
183 return wuffs_base__token__value_minor(this);
184}
185
186inline uint64_t //
187wuffs_base__token::value_base_category() const {
188 return wuffs_base__token__value_base_category(this);
189}
190
191inline uint64_t //
192wuffs_base__token::value_base_detail() const {
193 return wuffs_base__token__value_base_detail(this);
194}
195
196inline uint64_t //
197wuffs_base__token::length() const {
198 return wuffs_base__token__length(this);
199}
200
201#endif // __cplusplus
202
203// --------
204
Nigel Tao737e31f2020-02-11 11:23:17 +1100205typedef WUFFS_BASE__SLICE(wuffs_base__token) wuffs_base__slice_token;
206
207static inline wuffs_base__slice_token //
208wuffs_base__make_slice_token(wuffs_base__token* ptr, size_t len) {
209 wuffs_base__slice_token ret;
210 ret.ptr = ptr;
211 ret.len = len;
212 return ret;
213}
214
Nigel Tao36857982020-02-12 11:33:13 +1100215// --------
216
Nigel Tao737e31f2020-02-11 11:23:17 +1100217// wuffs_base__token_buffer_meta is the metadata for a
218// wuffs_base__token_buffer's data.
219typedef struct {
220 size_t wi; // Write index. Invariant: wi <= len.
221 size_t ri; // Read index. Invariant: ri <= wi.
222 uint64_t pos; // Position of the buffer start relative to the stream start.
223 bool closed; // No further writes are expected.
224} wuffs_base__token_buffer_meta;
225
226// wuffs_base__token_buffer is a 1-dimensional buffer (a pointer and length)
227// plus additional metadata.
228//
229// A value with all fields zero is a valid, empty buffer.
230typedef struct {
231 wuffs_base__slice_token data;
232 wuffs_base__token_buffer_meta meta;
233
234#ifdef __cplusplus
235 inline void compact();
236 inline uint64_t reader_available() const;
237 inline uint64_t reader_token_position() const;
238 inline uint64_t writer_available() const;
239 inline uint64_t writer_token_position() const;
240#endif // __cplusplus
241
242} wuffs_base__token_buffer;
243
244static inline wuffs_base__token_buffer //
245wuffs_base__make_token_buffer(wuffs_base__slice_token data,
246 wuffs_base__token_buffer_meta meta) {
247 wuffs_base__token_buffer ret;
248 ret.data = data;
249 ret.meta = meta;
250 return ret;
251}
252
253static inline wuffs_base__token_buffer_meta //
254wuffs_base__make_token_buffer_meta(size_t wi,
255 size_t ri,
256 uint64_t pos,
257 bool closed) {
258 wuffs_base__token_buffer_meta ret;
259 ret.wi = wi;
260 ret.ri = ri;
261 ret.pos = pos;
262 ret.closed = closed;
263 return ret;
264}
265
266static inline wuffs_base__token_buffer //
267wuffs_base__empty_token_buffer() {
268 wuffs_base__token_buffer ret;
269 ret.data.ptr = NULL;
270 ret.data.len = 0;
271 ret.meta.wi = 0;
272 ret.meta.ri = 0;
273 ret.meta.pos = 0;
274 ret.meta.closed = false;
275 return ret;
276}
277
278static inline wuffs_base__token_buffer_meta //
279wuffs_base__empty_token_buffer_meta() {
280 wuffs_base__token_buffer_meta ret;
281 ret.wi = 0;
282 ret.ri = 0;
283 ret.pos = 0;
284 ret.closed = false;
285 return ret;
286}
287
288// wuffs_base__token_buffer__compact moves any written but unread tokens to the
289// start of the buffer.
290static inline void //
291wuffs_base__token_buffer__compact(wuffs_base__token_buffer* buf) {
292 if (!buf || (buf->meta.ri == 0)) {
293 return;
294 }
295 buf->meta.pos = wuffs_base__u64__sat_add(buf->meta.pos, buf->meta.ri);
296 size_t n = buf->meta.wi - buf->meta.ri;
297 if (n != 0) {
298 memmove(buf->data.ptr, buf->data.ptr + buf->meta.ri,
299 n * sizeof(wuffs_base__token));
300 }
301 buf->meta.wi = n;
302 buf->meta.ri = 0;
303}
304
305static inline uint64_t //
306wuffs_base__token_buffer__reader_available(
307 const wuffs_base__token_buffer* buf) {
308 return buf ? buf->meta.wi - buf->meta.ri : 0;
309}
310
311static inline uint64_t //
312wuffs_base__token_buffer__reader_token_position(
313 const wuffs_base__token_buffer* buf) {
314 return buf ? wuffs_base__u64__sat_add(buf->meta.pos, buf->meta.ri) : 0;
315}
316
317static inline uint64_t //
318wuffs_base__token_buffer__writer_available(
319 const wuffs_base__token_buffer* buf) {
320 return buf ? buf->data.len - buf->meta.wi : 0;
321}
322
323static inline uint64_t //
324wuffs_base__token_buffer__writer_token_position(
325 const wuffs_base__token_buffer* buf) {
326 return buf ? wuffs_base__u64__sat_add(buf->meta.pos, buf->meta.wi) : 0;
327}
328
329#ifdef __cplusplus
330
331inline void //
332wuffs_base__token_buffer::compact() {
333 wuffs_base__token_buffer__compact(this);
334}
335
336inline uint64_t //
337wuffs_base__token_buffer::reader_available() const {
338 return wuffs_base__token_buffer__reader_available(this);
339}
340
341inline uint64_t //
342wuffs_base__token_buffer::reader_token_position() const {
343 return wuffs_base__token_buffer__reader_token_position(this);
344}
345
346inline uint64_t //
347wuffs_base__token_buffer::writer_available() const {
348 return wuffs_base__token_buffer__writer_available(this);
349}
350
351inline uint64_t //
352wuffs_base__token_buffer::writer_token_position() const {
353 return wuffs_base__token_buffer__writer_token_position(this);
354}
355
356#endif // __cplusplus