blob: 6aaed6acb0315b0fa331e72937cd6d3950707ff5 [file] [log] [blame]
Thiago Macieiraaddf8042017-02-26 11:37:06 -08001/****************************************************************************
2**
3** Copyright (C) 2017 Intel Corporation
4**
5** Permission is hereby granted, free of charge, to any person obtaining a copy
6** of this software and associated documentation files (the "Software"), to deal
7** in the Software without restriction, including without limitation the rights
8** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9** copies of the Software, and to permit persons to whom the Software is
10** furnished to do so, subject to the following conditions:
11**
12** The above copyright notice and this permission notice shall be included in
13** all copies or substantial portions of the Software.
14**
15** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21** THE SOFTWARE.
22**
23****************************************************************************/
24
25#define _BSD_SOURCE 1
26#define _DEFAULT_SOURCE 1
27#ifndef __STDC_LIMIT_MACROS
28# define __STDC_LIMIT_MACROS 1
29#endif
30
31#include "cbor.h"
32#include "cborinternal_p.h"
33#include "compilersupport_p.h"
Thiago Macieira17d42a12017-02-26 14:42:37 -080034#include "utf8_p.h"
Thiago Macieiraaddf8042017-02-26 11:37:06 -080035
36#include <string.h>
37
38#ifndef CBOR_NO_FLOATING_POINT
39# include <float.h>
40# include <math.h>
41#endif
42
43
44#ifndef CBOR_PARSER_MAX_RECURSIONS
45# define CBOR_PARSER_MAX_RECURSIONS 1024
46#endif
47
48/**
49 * \addtogroup CborParsing
50 * @{
51 */
52
53/**
54 * \enum CborValidationFlags
55 * The CborValidationFlags enum contains flags that control the validation of a
56 * CBOR stream.
57 *
58 * \value CborValidateBasic Validates only the syntax correctedness of the stream.
59 * \value CborValidateCanonical Validates that the stream is in canonical format, according to
60 * RFC 7049 section 3.9.
61 * \value CborValidateStrictMode Performs strict validation, according to RFC 7049 section 3.10.
62 * \value CborValidateStrictest Attempt to perform the strictest validation we know of.
63 *
64 * \value CborValidateShortestIntegrals (Canonical) Validate that integral numbers and lengths are
65 * enconded in their shortest form possible.
66 * \value CborValidateShortestFloatingPoint (Canonical) Validate that floating-point numbers are encoded
67 * in their shortest form possible.
68 * \value CborValidateShortestNumbers (Canonical) Validate both integrals and floating-point numbers
69 * are in their shortest form possible.
70 * \value CborValidateNoIndeterminateLength (Canonical) Validate that no string, array or map uses
71 * indeterminate length encoding.
72 * \value CborValidateMapIsSorted (Canonical & Strict mode) Validate that map keys appear in
73 * sorted order.
74 * \value CborValidateMapKeysAreUnique (Strict mode) Validate that map keys are unique.
75 * \value CborValidateTagUse (Strict mode) Validate that known tags are used with the
76 * correct types. This does not validate that the content of
77 * those types is syntactically correct.
78 * \value CborValidateUtf8 (Strict mode) Validate that text strings are appropriately
79 * encoded in UTF-8.
80 * \value CborValidateMapKeysAreString Validate that all map keys are text strings.
81 * \value CborValidateNoUndefined Validate that no elements of type "undefined" are present.
82 * \value CborValidateNoTags Validate that no tags are used.
83 * \value CborValidateFiniteFloatingPoint Validate that all floating point numbers are finite (no NaN or
84 * infinities are allowed).
85 * \value CborValidateCompleteData Validate that the stream is complete and there is no more data
86 * in the buffer.
87 * \value CborValidateNoUnknownSimpleTypesSA Validate that all Standards Action simple types are registered
88 * with IANA.
89 * \value CborValidateNoUnknownSimpleTypes Validate that all simple types used are registered with IANA.
90 * \value CborValidateNoUnknownTagsSA Validate that all Standard Actions tags are registered with IANA.
91 * \value CborValidateNoUnknownTagsSR Validate that all Standard Actions and Specification Required tags
92 * are registered with IANA (see below for limitations).
93 * \value CborValidateNoUnkonwnTags Validate that all tags are registered with IANA
94 * (see below for limitations).
95 *
96 * \par Simple type registry
97 * The CBOR specification requires that registration for use of the first 19
98 * simple types must be done by way of Standards Action. The rest of the simple
99 * types only require a specification. The official list can be obtained from
100 * https://www.iana.org/assignments/cbor-simple-values/cbor-simple-values.xhtml.
101 *
102 * \par
103 * There are no registered simple types recognized by this release of TinyCBOR
104 * (beyond those defined by RFC 7049).
105 *
106 * \par Tag registry
107 * The CBOR specification requires that registration for use of the first 23
108 * tags must be done by way of Standards Action. The next up to tag 255 only
109 * require a specification. Finally, all other tags can be registered on a
110 * first-come-first-serve basis. The official list can be ontained from
111 * https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml.
112 *
113 * \par
114 * Given the variability of this list, TinyCBOR cannot recognize all tags
115 * registered with IANA. Instead, the implementation only recognizes tags
116 * that are backed by an RFC.
117 *
118 * \par
119 * These are the tags known to the current TinyCBOR release:
Thiago Macieira0342d722017-02-26 13:12:50 -0800120<table>
121 <tr>
122 <th>Tag</th>
123 <th>Data Item</th>
124 <th>Semantics</th>
125 </tr>
126 <tr>
127 <td>0</td>
128 <td>UTF-8 text string</td>
129 <td>Standard date/time string</td>
130 </td>
131 <tr>
132 <td>1</td>
133 <td>integer</td>
134 <td>Epoch-based date/time</td>
135 </td>
136 <tr>
137 <td>2</td>
138 <td>byte string</td>
139 <td>Positive bignum</td>
140 </td>
141 <tr>
142 <td>3</td>
143 <td>byte string</td>
144 <td>Negative bignum</td>
145 </td>
146 <tr>
147 <td>4</td>
148 <td>array</td>
149 <td>Decimal fraction</td>
150 </td>
151 <tr>
152 <td>5</td>
153 <td>array</td>
154 <td>Bigfloat</td>
155 </td>
156 <tr>
157 <td>21</td>
158 <td>byte string, array, map</td>
159 <td>Expected conversion to base64url encoding</td>
160 </td>
161 <tr>
162 <td>22</td>
163 <td>byte string, array, map</td>
164 <td>Expected conversion to base64 encoding</td>
165 </td>
166 <tr>
167 <td>23</td>
168 <td>byte string, array, map</td>
169 <td>Expected conversion to base16 encoding</td>
170 </td>
171 <tr>
172 <td>24</td>
173 <td>byte string</td>
174 <td>Encoded CBOR data item</td>
175 </td>
176 <tr>
177 <td>32</td>
178 <td>UTF-8 text string</td>
179 <td>URI</td>
180 </td>
181 <tr>
182 <td>33</td>
183 <td>UTF-8 text string</td>
184 <td>base64url</td>
185 </td>
186 <tr>
187 <td>34</td>
188 <td>UTF-8 text string</td>
189 <td>base64</td>
190 </td>
191 <tr>
192 <td>35</td>
193 <td>UTF-8 text string</td>
194 <td>Regular expression</td>
195 </td>
196 <tr>
197 <td>36</td>
198 <td>UTF-8 text string</td>
199 <td>MIME message</td>
200 </td>
201 <tr>
202 <td>55799</td>
203 <td>any</td>
204 <td>Self-describe CBOR</td>
205 </td>
206</table>
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800207 */
208
Thiago Macieirac85bcf52017-03-06 09:04:40 +0100209struct KnownTagData { uint32_t tag; uint32_t types; };
Thiago Macieira0342d722017-02-26 13:12:50 -0800210static const struct KnownTagData knownTagData[] = {
Thiago Macieirac85bcf52017-03-06 09:04:40 +0100211 { 0, (uint8_t)CborTextStringType },
212 { 1, (uint8_t)(CborIntegerType+1) },
213 { 2, (uint8_t)CborByteStringType },
214 { 3, (uint8_t)CborByteStringType },
215 { 4, (uint8_t)CborArrayType },
216 { 5, (uint8_t)CborArrayType },
217 { 21, (uint8_t)CborByteStringType | ((uint8_t)CborArrayType << 8) | ((uint8_t)CborMapType << 16) },
218 { 22, (uint8_t)CborByteStringType | ((uint8_t)CborArrayType << 8) | ((uint8_t)CborMapType << 16) },
219 { 23, (uint8_t)CborByteStringType | ((uint8_t)CborArrayType << 8) | ((uint8_t)CborMapType << 16) },
220 { 24, (uint8_t)CborByteStringType },
221 { 32, (uint8_t)CborTextStringType },
222 { 33, (uint8_t)CborTextStringType },
223 { 34, (uint8_t)CborTextStringType },
224 { 35, (uint8_t)CborTextStringType },
225 { 36, (uint8_t)CborTextStringType },
226 { 55799, 0U }
Thiago Macieira0342d722017-02-26 13:12:50 -0800227};
228
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800229static CborError validate_value(CborValue *it, int flags, int recursionLeft);
230
Thiago Macieira17d42a12017-02-26 14:42:37 -0800231static inline CborError validate_utf8_string(const void *ptr, size_t n)
232{
233 const uint8_t *buffer = (const uint8_t *)ptr;
234 const uint8_t * const end = buffer + n;
235 while (buffer < end) {
236 uint32_t uc = get_utf8(&buffer, end);
237 if (uc == ~0U)
238 return CborErrorInvalidUtf8TextString;
239 }
240 return CborNoError;
241}
242
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800243static inline CborError validate_simple_type(uint8_t simple_type, int flags)
244{
245 /* At current time, all known simple types are those from RFC 7049,
246 * which are parsed by the parser into different CBOR types.
247 * That means that if we've got here, the type is unknown */
248 if (simple_type < 32)
249 return (flags & CborValidateNoUnknownSimpleTypesSA) ? CborErrorUnknownSimpleType : CborNoError;
250 return (flags & CborValidateNoUnknownSimpleTypes) == CborValidateNoUnknownSimpleTypes ?
251 CborErrorUnknownSimpleType : CborNoError;
252}
253
Thiago Macieira63d143b2017-02-26 17:43:50 -0800254static inline CborError validate_number(const CborValue *it, CborType type, int flags)
255{
256 CborError err = CborNoError;
257 const uint8_t *ptr = it->ptr;
258 uint64_t value;
259
260 if ((flags & CborValidateShortestIntegrals) == 0)
261 return err;
262 if (type >= CborHalfFloatType && type <= CborDoubleType)
263 return err; /* checked elsewhere */
264
265 err = _cbor_value_extract_number(&ptr, it->parser->end, &value);
266 if (err)
267 return err;
268
269 size_t bytesUsed = (size_t)(ptr - it->ptr - 1);
270 size_t bytesNeeded = 0;
271 if (value >= Value8Bit)
272 ++bytesNeeded;
273 if (value > 0xffU)
274 ++bytesNeeded;
275 if (value > 0xffffU)
276 bytesNeeded += 2;
277 if (value > 0xffffffffU)
278 bytesNeeded += 4;
279 if (bytesNeeded < bytesUsed)
280 return CborErrorOverlongEncoding;
281 return CborNoError;
282}
283
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800284static inline CborError validate_tag(CborValue *it, CborTag tag, int flags, int recursionLeft)
285{
286 CborType type = cbor_value_get_type(it);
Thiago Macieira0342d722017-02-26 13:12:50 -0800287 const size_t knownTagCount = sizeof(knownTagData) / sizeof(knownTagData[0]);
288 const struct KnownTagData *tagData = knownTagData;
289 const struct KnownTagData * const knownTagDataEnd = knownTagData + knownTagCount;
290
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800291 if (!recursionLeft)
292 return CborErrorNestingTooDeep;
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800293 if (flags & CborValidateNoTags)
294 return CborErrorExcludedType;
Thiago Macieira0342d722017-02-26 13:12:50 -0800295
296 /* find the tag data, if any */
297 for ( ; tagData != knownTagDataEnd; ++tagData) {
298 if (tagData->tag < tag)
299 continue;
300 if (tagData->tag > tag)
301 tagData = NULL;
302 break;
303 }
304 if (tagData == knownTagDataEnd)
305 tagData = NULL;
306
307 if (flags & CborValidateNoUnknownTags && !tagData) {
308 /* tag not found */
309 if (flags & CborValidateNoUnknownTagsSA && tag < 24)
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800310 return CborErrorUnknownTag;
Thiago Macieira0342d722017-02-26 13:12:50 -0800311 if ((flags & CborValidateNoUnknownTagsSR) == CborValidateNoUnknownTagsSR && tag < 256)
312 return CborErrorUnknownTag;
313 if ((flags & CborValidateNoUnknownTags) == CborValidateNoUnknownTags)
314 return CborErrorUnknownTag;
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800315 }
316
Thiago Macieirac85bcf52017-03-06 09:04:40 +0100317 if (flags & CborValidateTagUse && tagData && tagData->types) {
318 uint32_t allowedTypes = tagData->types;
319
320 /* correct Integer so it's not zero */
321 if (type == CborIntegerType)
322 ++type;
323
324 while (allowedTypes) {
325 if ((uint8_t)(allowedTypes & 0xff) == type)
326 break;
327 allowedTypes >>= 8;
328 }
329 if (!allowedTypes)
330 return CborErrorInappropriateTagForType;
331 }
332
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800333 return validate_value(it, flags, recursionLeft);
334}
335
336#ifndef CBOR_NO_FLOATING_POINT
337static inline CborError validate_floating_point(CborValue *it, CborType type, int flags)
338{
339 CborError err;
340 double val;
341 float valf;
342 uint16_t valf16;
343
344 if (type != CborDoubleType) {
345 if (type == CborFloatType) {
346 err = cbor_value_get_float(it, &valf);
347 val = valf;
348 } else {
349# ifdef CBOR_NO_HALF_FLOAT_TYPE
350 (void)val16;
351 return CborErrorUnsupportedType;
352# else
353 err = cbor_value_get_half_float(it, &valf16);
354 val = decode_half(valf16);
355# endif
356 }
357 } else {
358 err = cbor_value_get_double(it, &val);
359 }
360 cbor_assert(err == CborNoError); /* can't fail */
361
362 int r = fpclassify(val);
363 if (r == FP_NAN || r == FP_INFINITE) {
364 if (flags & CborValidateFiniteFloatingPoint)
365 return CborErrorExcludedValue;
366 if (flags & CborValidateShortestFloatingPoint) {
367 if (type == CborDoubleType)
368 return CborErrorOverlongEncoding;
369# ifndef CBOR_NO_HALF_FLOAT_TYPE
370 if (type == CborFloatType)
371 return CborErrorOverlongEncoding;
372 if (r == FP_NAN && valf16 != 0x7e00)
373 return CborErrorImproperValue;
374 if (r == FP_INFINITE && valf16 != 0x7c00 && valf16 != 0xfc00)
375 return CborErrorImproperValue;
376# endif
377 }
378 }
379
380 if (flags & CborValidateShortestFloatingPoint && type > CborHalfFloatType) {
381 if (type == CborDoubleType) {
382 valf = (float)val;
383 if ((double)valf == val)
384 return CborErrorOverlongEncoding;
385 }
386# ifndef CBOR_NO_HALF_FLOAT_TYPE
387 if (type == CborFloatType) {
388 valf16 = encode_half(valf);
389 if (valf == decode_half(valf16))
390 return CborErrorOverlongEncoding;
391 }
392# endif
393 }
394
395 return CborNoError;
396}
397#endif
398
399static CborError validate_container(CborValue *it, int containerType, int flags, int recursionLeft)
400{
401 CborError err;
Thiago Macieirad9a8d832017-02-26 18:37:55 -0800402 const uint8_t *previous = NULL;
403 const uint8_t *previous_end;
404
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800405 if (!recursionLeft)
406 return CborErrorNestingTooDeep;
407
408 while (!cbor_value_at_end(it)) {
Thiago Macieirad9a8d832017-02-26 18:37:55 -0800409 const uint8_t *current;
410
411 if (containerType == CborMapType) {
412 current = it->ptr;
413 if (flags & CborValidateMapKeysAreString) {
414 CborType type = cbor_value_get_type(it);
415 if (type == CborTagType) {
416 /* skip the tags */
417 CborValue copy = *it;
418 err = cbor_value_skip_tag(&copy);
419 if (err)
420 return err;
421 type = cbor_value_get_type(&copy);
422 }
423 if (type != CborTextStringType)
424 return CborErrorMapKeyNotString;
425 }
426 }
427
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800428 err = validate_value(it, flags, recursionLeft);
429 if (err)
430 return err;
431
Thiago Macieirad9a8d832017-02-26 18:37:55 -0800432 if (containerType != CborMapType)
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800433 continue;
434
Thiago Macieirad9a8d832017-02-26 18:37:55 -0800435 if (flags & CborValidateMapIsSorted) {
436 if (previous) {
437 uint64_t len1, len2;
438 const uint8_t *ptr;
439
440 /* extract the two lengths */
441 ptr = previous;
442 _cbor_value_extract_number(&ptr, it->parser->end, &len1);
443 ptr = current;
444 _cbor_value_extract_number(&ptr, it->parser->end, &len2);
445
446 if (len1 > len2)
447 return CborErrorMapNotSorted;
448 if (len1 == len2) {
449 size_t bytelen1 = (size_t)(previous_end - previous);
450 size_t bytelen2 = (size_t)(it->ptr - current);
451 int r = memcmp(previous, current, bytelen1 <= bytelen2 ? bytelen1 : bytelen2);
452
453 if (r == 0 && bytelen1 != bytelen2)
454 r = bytelen1 < bytelen2 ? -1 : +1;
455 if (r > 0)
456 return CborErrorMapNotSorted;
457 if (r == 0 && (flags & CborValidateMapKeysAreUnique) == CborValidateMapKeysAreUnique)
458 return CborErrorMapKeysNotUnique;
459 }
460 }
461
462 previous = current;
463 previous_end = it->ptr;
464 }
465
466 /* map: that was the key, so get the value */
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800467 err = validate_value(it, flags, recursionLeft);
468 if (err)
469 return err;
470 }
471 return CborNoError;
472}
473
474static CborError validate_value(CborValue *it, int flags, int recursionLeft)
475{
476 CborError err;
Thiago Macieira63d143b2017-02-26 17:43:50 -0800477 CborType type = cbor_value_get_type(it);
478
479 if (cbor_value_is_length_known(it)) {
480 err = validate_number(it, type, flags);
481 if (err)
482 return err;
483 } else {
484 if (flags & CborValidateNoIndeterminateLength)
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800485 return CborErrorUnknownLength;
486 }
487
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800488 switch (type) {
489 case CborArrayType:
490 case CborMapType: {
491 /* recursive type */
492 CborValue recursed;
493 err = cbor_value_enter_container(it, &recursed);
494 if (!err)
495 err = validate_container(&recursed, type, flags, recursionLeft - 1);
496 if (err) {
497 it->ptr = recursed.ptr;
498 return err;
499 }
500 err = cbor_value_leave_container(it, &recursed);
501 if (err)
502 return err;
503 return CborNoError;
504 }
505
506 case CborIntegerType: {
507 uint64_t val;
508 err = cbor_value_get_raw_integer(it, &val);
509 cbor_assert(err == CborNoError); /* can't fail */
510
511 break;
512 }
513
514 case CborByteStringType:
515 case CborTextStringType: {
516 size_t n = 0;
517 const void *ptr;
518
Thiago Macieira63d143b2017-02-26 17:43:50 -0800519 err = _cbor_value_prepare_string_iteration(it);
520 if (err)
521 return err;
522
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800523 while (1) {
Thiago Macieira63d143b2017-02-26 17:43:50 -0800524 err = validate_number(it, type, flags);
525 if (err)
526 return err;
527
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800528 err = _cbor_value_get_string_chunk(it, &ptr, &n, it);
529 if (err)
530 return err;
531 if (!ptr)
532 break;
Thiago Macieira17d42a12017-02-26 14:42:37 -0800533
534 if (type == CborTextStringType && flags & CborValidateUtf8) {
535 err = validate_utf8_string(ptr, n);
536 if (err)
537 return err;
538 }
Thiago Macieiraaddf8042017-02-26 11:37:06 -0800539 }
540
541 return CborNoError;
542 }
543
544 case CborTagType: {
545 CborTag tag;
546 err = cbor_value_get_tag(it, &tag);
547 cbor_assert(err == CborNoError); /* can't fail */
548
549 err = cbor_value_advance_fixed(it);
550 if (err)
551 return err;
552 err = validate_tag(it, tag, flags, recursionLeft - 1);
553 if (err)
554 return err;
555
556 return CborNoError;
557 }
558
559 case CborSimpleType: {
560 uint8_t simple_type;
561 err = cbor_value_get_simple_type(it, &simple_type);
562 cbor_assert(err == CborNoError); /* can't fail */
563 err = validate_simple_type(simple_type, flags);
564 if (err)
565 return err;
566 break;
567 }
568
569 case CborNullType:
570 case CborBooleanType:
571 break;
572
573 case CborUndefinedType:
574 if (flags & CborValidateNoUndefined)
575 return CborErrorExcludedType;
576 break;
577
578 case CborHalfFloatType:
579 case CborFloatType:
580 case CborDoubleType: {
581#ifdef CBOR_NO_FLOATING_POINT
582 return CborErrorUnsupportedType;
583#else
584 err = validate_floating_point(it, type, flags);
585 if (err)
586 return err;
587 break;
588 }
589#endif /* !CBOR_NO_FLOATING_POINT */
590
591 case CborInvalidType:
592 return CborErrorUnknownType;
593 }
594
595 err = cbor_value_advance_fixed(it);
596 return err;
597}
598
599/**
600 * Performs a full validation controlled by the \a flags options of the CBOR
601 * stream pointed by \a it and returns the error it found. If no error was
602 * found, it returns CborNoError and the application can iterate over the items
603 * with certainty that no other errors will appear during parsing.
604 *
605 * If \a flags is CborValidateBasic, the result should be the same as
606 * cbor_value_validate_basic().
607 *
608 * This function has the same timing and memory requirements as
609 * cbor_value_advance() and cbor_value_validate_basic().
610 *
611 * \sa CborValidationFlags, cbor_value_validate_basic(), cbor_value_advance()
612 */
613CborError cbor_value_validate(const CborValue *it, int flags)
614{
615 CborValue value = *it;
616 CborError err = validate_value(&value, flags, CBOR_PARSER_MAX_RECURSIONS);
617 if (err)
618 return err;
619 if (flags & CborValidateCompleteData && it->ptr != it->parser->end)
620 return CborErrorGarbageAtEnd;
621 return CborNoError;
622}
623
624/**
625 * @}
626 */