blob: b7cae9ee73e29bcf516f971b70977a7e47dbc445 [file] [log] [blame]
Thiago Macieiraaddf8042017-02-26 11:37:06 -08001/****************************************************************************
2**
3** Copyright (C) 2017 Intel Corporation
4**
5** Permission is hereby granted, free of charge, to any person obtaining a copy
6** of this software and associated documentation files (the "Software"), to deal
7** in the Software without restriction, including without limitation the rights
8** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9** copies of the Software, and to permit persons to whom the Software is
10** furnished to do so, subject to the following conditions:
11**
12** The above copyright notice and this permission notice shall be included in
13** all copies or substantial portions of the Software.
14**
15** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21** THE SOFTWARE.
22**
23****************************************************************************/
24
25#define _BSD_SOURCE 1
26#define _DEFAULT_SOURCE 1
27#ifndef __STDC_LIMIT_MACROS
28# define __STDC_LIMIT_MACROS 1
29#endif
30
31#include "cbor.h"
32#include "cborinternal_p.h"
33#include "compilersupport_p.h"
Thiago Macieira17d42a12017-02-26 14:42:37 -080034#include "utf8_p.h"
Thiago Macieiraaddf8042017-02-26 11:37:06 -080035
36#include <string.h>
37
38#ifndef CBOR_NO_FLOATING_POINT
39# include <float.h>
40# include <math.h>
41#endif
42
43
44#ifndef CBOR_PARSER_MAX_RECURSIONS
45# define CBOR_PARSER_MAX_RECURSIONS 1024
46#endif
47
48/**
49 * \addtogroup CborParsing
50 * @{
51 */
52
53/**
54 * \enum CborValidationFlags
55 * The CborValidationFlags enum contains flags that control the validation of a
56 * CBOR stream.
57 *
58 * \value CborValidateBasic Validates only the syntax correctedness of the stream.
59 * \value CborValidateCanonical Validates that the stream is in canonical format, according to
60 * RFC 7049 section 3.9.
61 * \value CborValidateStrictMode Performs strict validation, according to RFC 7049 section 3.10.
62 * \value CborValidateStrictest Attempt to perform the strictest validation we know of.
63 *
64 * \value CborValidateShortestIntegrals (Canonical) Validate that integral numbers and lengths are
65 * enconded in their shortest form possible.
66 * \value CborValidateShortestFloatingPoint (Canonical) Validate that floating-point numbers are encoded
67 * in their shortest form possible.
68 * \value CborValidateShortestNumbers (Canonical) Validate both integrals and floating-point numbers
69 * are in their shortest form possible.
70 * \value CborValidateNoIndeterminateLength (Canonical) Validate that no string, array or map uses
71 * indeterminate length encoding.
72 * \value CborValidateMapIsSorted (Canonical & Strict mode) Validate that map keys appear in
73 * sorted order.
74 * \value CborValidateMapKeysAreUnique (Strict mode) Validate that map keys are unique.
75 * \value CborValidateTagUse (Strict mode) Validate that known tags are used with the
76 * correct types. This does not validate that the content of
77 * those types is syntactically correct.
78 * \value CborValidateUtf8 (Strict mode) Validate that text strings are appropriately
79 * encoded in UTF-8.
80 * \value CborValidateMapKeysAreString Validate that all map keys are text strings.
81 * \value CborValidateNoUndefined Validate that no elements of type "undefined" are present.
82 * \value CborValidateNoTags Validate that no tags are used.
83 * \value CborValidateFiniteFloatingPoint Validate that all floating point numbers are finite (no NaN or
84 * infinities are allowed).
85 * \value CborValidateCompleteData Validate that the stream is complete and there is no more data
86 * in the buffer.
87 * \value CborValidateNoUnknownSimpleTypesSA Validate that all Standards Action simple types are registered
88 * with IANA.
89 * \value CborValidateNoUnknownSimpleTypes Validate that all simple types used are registered with IANA.
90 * \value CborValidateNoUnknownTagsSA Validate that all Standard Actions tags are registered with IANA.
91 * \value CborValidateNoUnknownTagsSR Validate that all Standard Actions and Specification Required tags
92 * are registered with IANA (see below for limitations).
93 * \value CborValidateNoUnkonwnTags Validate that all tags are registered with IANA
94 * (see below for limitations).
95 *
96 * \par Simple type registry
97 * The CBOR specification requires that registration for use of the first 19
98 * simple types must be done by way of Standards Action. The rest of the simple
99 * types only require a specification. The official list can be obtained from
100 * https://www.iana.org/assignments/cbor-simple-values/cbor-simple-values.xhtml.
101 *
102 * \par
103 * There are no registered simple types recognized by this release of TinyCBOR
104 * (beyond those defined by RFC 7049).
105 *
106 * \par Tag registry
107 * The CBOR specification requires that registration for use of the first 23
108 * tags must be done by way of Standards Action. The next up to tag 255 only
109 * require a specification. Finally, all other tags can be registered on a
110 * first-come-first-serve basis. The official list can be ontained from
111 * https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml.
112 *
113 * \par
114 * Given the variability of this list, TinyCBOR cannot recognize all tags
115 * registered with IANA. Instead, the implementation only recognizes tags
116 * that are backed by an RFC.
117 *
118 * \par
119 * These are the tags known to the current TinyCBOR release:
Thiago Macieira0342d722017-02-26 13:12:50 -0800120<table>
121 <tr>
122 <th>Tag</th>
123 <th>Data Item</th>
124 <th>Semantics</th>
125 </tr>
126 <tr>
127 <td>0</td>
128 <td>UTF-8 text string</td>
129 <td>Standard date/time string</td>
130 </td>
131 <tr>
132 <td>1</td>
133 <td>integer</td>
134 <td>Epoch-based date/time</td>
135 </td>
136 <tr>
137 <td>2</td>
138 <td>byte string</td>
139 <td>Positive bignum</td>
140 </td>
141 <tr>
142 <td>3</td>
143 <td>byte string</td>
144 <td>Negative bignum</td>
145 </td>
146 <tr>
147 <td>4</td>
148 <td>array</td>
149 <td>Decimal fraction</td>
150 </td>
151 <tr>
152 <td>5</td>
153 <td>array</td>
154 <td>Bigfloat</td>
155 </td>
156 <tr>
157 <td>21</td>
158 <td>byte string, array, map</td>
159 <td>Expected conversion to base64url encoding</td>
160 </td>
161 <tr>
162 <td>22</td>
163 <td>byte string, array, map</td>
164 <td>Expected conversion to base64 encoding</td>
165 </td>
166 <tr>
167 <td>23</td>
168 <td>byte string, array, map</td>
169 <td>Expected conversion to base16 encoding</td>
170 </td>
171 <tr>
172 <td>24</td>
173 <td>byte string</td>
174 <td>Encoded CBOR data item</td>
175 </td>
176 <tr>
177 <td>32</td>
178 <td>UTF-8 text string</td>
179 <td>URI</td>
180 </td>
181 <tr>
182 <td>33</td>
183 <td>UTF-8 text string</td>
184 <td>base64url</td>
185 </td>
186 <tr>
187 <td>34</td>
188 <td>UTF-8 text string</td>
189 <td>base64</td>
190 </td>
191 <tr>
192 <td>35</td>
193 <td>UTF-8 text string</td>
194 <td>Regular expression</td>
195 </td>
196 <tr>
197 <td>36</td>
198 <td>UTF-8 text string</td>
199 <td>MIME message</td>
200 </td>
201 <tr>
202 <td>55799</td>
203 <td>any</td>
204 <td>Self-describe CBOR</td>
205 </td>
206</table>
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800207 */
208
Thiago Macieirac85bcf52017-03-06 09:04:40 +0100209struct KnownTagData { uint32_t tag; uint32_t types; };
Thiago Macieira0342d722017-02-26 13:12:50 -0800210static const struct KnownTagData knownTagData[] = {
Thiago Macieirac85bcf52017-03-06 09:04:40 +0100211 { 0, (uint8_t)CborTextStringType },
212 { 1, (uint8_t)(CborIntegerType+1) },
213 { 2, (uint8_t)CborByteStringType },
214 { 3, (uint8_t)CborByteStringType },
215 { 4, (uint8_t)CborArrayType },
216 { 5, (uint8_t)CborArrayType },
217 { 21, (uint8_t)CborByteStringType | ((uint8_t)CborArrayType << 8) | ((uint8_t)CborMapType << 16) },
218 { 22, (uint8_t)CborByteStringType | ((uint8_t)CborArrayType << 8) | ((uint8_t)CborMapType << 16) },
219 { 23, (uint8_t)CborByteStringType | ((uint8_t)CborArrayType << 8) | ((uint8_t)CborMapType << 16) },
220 { 24, (uint8_t)CborByteStringType },
221 { 32, (uint8_t)CborTextStringType },
222 { 33, (uint8_t)CborTextStringType },
223 { 34, (uint8_t)CborTextStringType },
224 { 35, (uint8_t)CborTextStringType },
225 { 36, (uint8_t)CborTextStringType },
226 { 55799, 0U }
Thiago Macieira0342d722017-02-26 13:12:50 -0800227};
228
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800229static CborError validate_value(CborValue *it, int flags, int recursionLeft);
230
Thiago Macieira17d42a12017-02-26 14:42:37 -0800231static inline CborError validate_utf8_string(const void *ptr, size_t n)
232{
233 const uint8_t *buffer = (const uint8_t *)ptr;
234 const uint8_t * const end = buffer + n;
235 while (buffer < end) {
236 uint32_t uc = get_utf8(&buffer, end);
237 if (uc == ~0U)
238 return CborErrorInvalidUtf8TextString;
239 }
240 return CborNoError;
241}
242
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800243static inline CborError validate_simple_type(uint8_t simple_type, int flags)
244{
245 /* At current time, all known simple types are those from RFC 7049,
246 * which are parsed by the parser into different CBOR types.
247 * That means that if we've got here, the type is unknown */
248 if (simple_type < 32)
249 return (flags & CborValidateNoUnknownSimpleTypesSA) ? CborErrorUnknownSimpleType : CborNoError;
250 return (flags & CborValidateNoUnknownSimpleTypes) == CborValidateNoUnknownSimpleTypes ?
251 CborErrorUnknownSimpleType : CborNoError;
252}
253
Thiago Macieira63d143b2017-02-26 17:43:50 -0800254static inline CborError validate_number(const CborValue *it, CborType type, int flags)
255{
256 CborError err = CborNoError;
257 const uint8_t *ptr = it->ptr;
258 uint64_t value;
259
260 if ((flags & CborValidateShortestIntegrals) == 0)
261 return err;
262 if (type >= CborHalfFloatType && type <= CborDoubleType)
263 return err; /* checked elsewhere */
264
265 err = _cbor_value_extract_number(&ptr, it->parser->end, &value);
266 if (err)
267 return err;
268
269 size_t bytesUsed = (size_t)(ptr - it->ptr - 1);
270 size_t bytesNeeded = 0;
271 if (value >= Value8Bit)
272 ++bytesNeeded;
273 if (value > 0xffU)
274 ++bytesNeeded;
275 if (value > 0xffffU)
276 bytesNeeded += 2;
277 if (value > 0xffffffffU)
278 bytesNeeded += 4;
279 if (bytesNeeded < bytesUsed)
280 return CborErrorOverlongEncoding;
281 return CborNoError;
282}
283
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800284static inline CborError validate_tag(CborValue *it, CborTag tag, int flags, int recursionLeft)
285{
286 CborType type = cbor_value_get_type(it);
Thiago Macieira0342d722017-02-26 13:12:50 -0800287 const size_t knownTagCount = sizeof(knownTagData) / sizeof(knownTagData[0]);
288 const struct KnownTagData *tagData = knownTagData;
289 const struct KnownTagData * const knownTagDataEnd = knownTagData + knownTagCount;
290
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800291 if (!recursionLeft)
292 return CborErrorNestingTooDeep;
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800293 if (flags & CborValidateNoTags)
294 return CborErrorExcludedType;
Thiago Macieira0342d722017-02-26 13:12:50 -0800295
296 /* find the tag data, if any */
297 for ( ; tagData != knownTagDataEnd; ++tagData) {
298 if (tagData->tag < tag)
299 continue;
300 if (tagData->tag > tag)
301 tagData = NULL;
302 break;
303 }
304 if (tagData == knownTagDataEnd)
305 tagData = NULL;
306
307 if (flags & CborValidateNoUnknownTags && !tagData) {
308 /* tag not found */
309 if (flags & CborValidateNoUnknownTagsSA && tag < 24)
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800310 return CborErrorUnknownTag;
Thiago Macieira0342d722017-02-26 13:12:50 -0800311 if ((flags & CborValidateNoUnknownTagsSR) == CborValidateNoUnknownTagsSR && tag < 256)
312 return CborErrorUnknownTag;
313 if ((flags & CborValidateNoUnknownTags) == CborValidateNoUnknownTags)
314 return CborErrorUnknownTag;
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800315 }
316
Thiago Macieirac85bcf52017-03-06 09:04:40 +0100317 if (flags & CborValidateTagUse && tagData && tagData->types) {
318 uint32_t allowedTypes = tagData->types;
319
320 /* correct Integer so it's not zero */
321 if (type == CborIntegerType)
322 ++type;
323
324 while (allowedTypes) {
325 if ((uint8_t)(allowedTypes & 0xff) == type)
326 break;
327 allowedTypes >>= 8;
328 }
329 if (!allowedTypes)
330 return CborErrorInappropriateTagForType;
331 }
332
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800333 return validate_value(it, flags, recursionLeft);
334}
335
336#ifndef CBOR_NO_FLOATING_POINT
337static inline CborError validate_floating_point(CborValue *it, CborType type, int flags)
338{
339 CborError err;
340 double val;
341 float valf;
342 uint16_t valf16;
343
344 if (type != CborDoubleType) {
345 if (type == CborFloatType) {
346 err = cbor_value_get_float(it, &valf);
347 val = valf;
348 } else {
349# ifdef CBOR_NO_HALF_FLOAT_TYPE
350 (void)val16;
351 return CborErrorUnsupportedType;
352# else
353 err = cbor_value_get_half_float(it, &valf16);
354 val = decode_half(valf16);
355# endif
356 }
357 } else {
358 err = cbor_value_get_double(it, &val);
359 }
360 cbor_assert(err == CborNoError); /* can't fail */
361
362 int r = fpclassify(val);
363 if (r == FP_NAN || r == FP_INFINITE) {
364 if (flags & CborValidateFiniteFloatingPoint)
365 return CborErrorExcludedValue;
366 if (flags & CborValidateShortestFloatingPoint) {
367 if (type == CborDoubleType)
368 return CborErrorOverlongEncoding;
369# ifndef CBOR_NO_HALF_FLOAT_TYPE
370 if (type == CborFloatType)
371 return CborErrorOverlongEncoding;
372 if (r == FP_NAN && valf16 != 0x7e00)
373 return CborErrorImproperValue;
374 if (r == FP_INFINITE && valf16 != 0x7c00 && valf16 != 0xfc00)
375 return CborErrorImproperValue;
376# endif
377 }
378 }
379
380 if (flags & CborValidateShortestFloatingPoint && type > CborHalfFloatType) {
381 if (type == CborDoubleType) {
382 valf = (float)val;
383 if ((double)valf == val)
384 return CborErrorOverlongEncoding;
385 }
386# ifndef CBOR_NO_HALF_FLOAT_TYPE
387 if (type == CborFloatType) {
388 valf16 = encode_half(valf);
389 if (valf == decode_half(valf16))
390 return CborErrorOverlongEncoding;
391 }
392# endif
393 }
394
395 return CborNoError;
396}
397#endif
398
399static CborError validate_container(CborValue *it, int containerType, int flags, int recursionLeft)
400{
401 CborError err;
402 if (!recursionLeft)
403 return CborErrorNestingTooDeep;
404
405 while (!cbor_value_at_end(it)) {
406 err = validate_value(it, flags, recursionLeft);
407 if (err)
408 return err;
409
410 if (containerType == CborArrayType)
411 continue;
412
413 /* map: that was the key, so get he value */
414 err = validate_value(it, flags, recursionLeft);
415 if (err)
416 return err;
417 }
418 return CborNoError;
419}
420
421static CborError validate_value(CborValue *it, int flags, int recursionLeft)
422{
423 CborError err;
Thiago Macieira63d143b2017-02-26 17:43:50 -0800424 CborType type = cbor_value_get_type(it);
425
426 if (cbor_value_is_length_known(it)) {
427 err = validate_number(it, type, flags);
428 if (err)
429 return err;
430 } else {
431 if (flags & CborValidateNoIndeterminateLength)
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800432 return CborErrorUnknownLength;
433 }
434
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800435 switch (type) {
436 case CborArrayType:
437 case CborMapType: {
438 /* recursive type */
439 CborValue recursed;
440 err = cbor_value_enter_container(it, &recursed);
441 if (!err)
442 err = validate_container(&recursed, type, flags, recursionLeft - 1);
443 if (err) {
444 it->ptr = recursed.ptr;
445 return err;
446 }
447 err = cbor_value_leave_container(it, &recursed);
448 if (err)
449 return err;
450 return CborNoError;
451 }
452
453 case CborIntegerType: {
454 uint64_t val;
455 err = cbor_value_get_raw_integer(it, &val);
456 cbor_assert(err == CborNoError); /* can't fail */
457
458 break;
459 }
460
461 case CborByteStringType:
462 case CborTextStringType: {
463 size_t n = 0;
464 const void *ptr;
465
Thiago Macieira63d143b2017-02-26 17:43:50 -0800466 err = _cbor_value_prepare_string_iteration(it);
467 if (err)
468 return err;
469
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800470 while (1) {
Thiago Macieira63d143b2017-02-26 17:43:50 -0800471 err = validate_number(it, type, flags);
472 if (err)
473 return err;
474
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800475 err = _cbor_value_get_string_chunk(it, &ptr, &n, it);
476 if (err)
477 return err;
478 if (!ptr)
479 break;
Thiago Macieira17d42a12017-02-26 14:42:37 -0800480
481 if (type == CborTextStringType && flags & CborValidateUtf8) {
482 err = validate_utf8_string(ptr, n);
483 if (err)
484 return err;
485 }
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800486 }
487
488 return CborNoError;
489 }
490
491 case CborTagType: {
492 CborTag tag;
493 err = cbor_value_get_tag(it, &tag);
494 cbor_assert(err == CborNoError); /* can't fail */
495
496 err = cbor_value_advance_fixed(it);
497 if (err)
498 return err;
499 err = validate_tag(it, tag, flags, recursionLeft - 1);
500 if (err)
501 return err;
502
503 return CborNoError;
504 }
505
506 case CborSimpleType: {
507 uint8_t simple_type;
508 err = cbor_value_get_simple_type(it, &simple_type);
509 cbor_assert(err == CborNoError); /* can't fail */
510 err = validate_simple_type(simple_type, flags);
511 if (err)
512 return err;
513 break;
514 }
515
516 case CborNullType:
517 case CborBooleanType:
518 break;
519
520 case CborUndefinedType:
521 if (flags & CborValidateNoUndefined)
522 return CborErrorExcludedType;
523 break;
524
525 case CborHalfFloatType:
526 case CborFloatType:
527 case CborDoubleType: {
528#ifdef CBOR_NO_FLOATING_POINT
529 return CborErrorUnsupportedType;
530#else
531 err = validate_floating_point(it, type, flags);
532 if (err)
533 return err;
534 break;
535 }
536#endif /* !CBOR_NO_FLOATING_POINT */
537
538 case CborInvalidType:
539 return CborErrorUnknownType;
540 }
541
542 err = cbor_value_advance_fixed(it);
543 return err;
544}
545
546/**
547 * Performs a full validation controlled by the \a flags options of the CBOR
548 * stream pointed by \a it and returns the error it found. If no error was
549 * found, it returns CborNoError and the application can iterate over the items
550 * with certainty that no other errors will appear during parsing.
551 *
552 * If \a flags is CborValidateBasic, the result should be the same as
553 * cbor_value_validate_basic().
554 *
555 * This function has the same timing and memory requirements as
556 * cbor_value_advance() and cbor_value_validate_basic().
557 *
558 * \sa CborValidationFlags, cbor_value_validate_basic(), cbor_value_advance()
559 */
560CborError cbor_value_validate(const CborValue *it, int flags)
561{
562 CborValue value = *it;
563 CborError err = validate_value(&value, flags, CBOR_PARSER_MAX_RECURSIONS);
564 if (err)
565 return err;
566 if (flags & CborValidateCompleteData && it->ptr != it->parser->end)
567 return CborErrorGarbageAtEnd;
568 return CborNoError;
569}
570
571/**
572 * @}
573 */