Validation: Implement recognition of known tags

This implements:
 - CborValidateNoUnknownTagsSA
 - CborValidateNoUnknownTagsSR
 - CborValidateNoUnknownTags

This commit adds a Perl script that parses a machine-readable list of
known tags (also added in this commit). That allows us to more easily
update the known tag list when more tags are added by IANA to the
registry (see API documentation for the rationale on what tags we
recognise).

If there's an interest, we can change the file where the tag list is
generated into code so that implementations can choose which tags they
want to recognise at compile time.

Signed-off-by: Thiago Macieira <thiago.macieira@intel.com>
diff --git a/src/cbor.h b/src/cbor.h
index 46e72e9..588cfdb 100644
--- a/src/cbor.h
+++ b/src/cbor.h
@@ -97,7 +97,7 @@
 
 typedef uint64_t CborTag;
 typedef enum CborKnownTags {
-    CborDateTimeStringTag          = 0,        /* RFC 3339 format: YYYY-MM-DD hh:mm:ss+zzzz */
+    CborDateTimeStringTag          = 0,
     CborUnixTime_tTag              = 1,
     CborPositiveBignumTag          = 2,
     CborNegativeBignumTag          = 3,
@@ -106,11 +106,12 @@
     CborExpectedBase64urlTag       = 21,
     CborExpectedBase64Tag          = 22,
     CborExpectedBase16Tag          = 23,
-    CborUriTag                     = 32,
+    CborEncodedCborTag             = 24,
+    CborUrlTag                     = 32,
     CborBase64urlTag               = 33,
     CborBase64Tag                  = 34,
     CborRegularExpressionTag       = 35,
-    CborMimeMessageTag             = 36,       /* RFC 2045-2047 */
+    CborMimeMessageTag             = 36,
     CborSignatureTag               = 55799
 } CborKnownTags;
 
diff --git a/src/cborvalidation.c b/src/cborvalidation.c
index 6b19c58..dbc5008 100644
--- a/src/cborvalidation.c
+++ b/src/cborvalidation.c
@@ -116,9 +116,115 @@
  *
  * \par
  * These are the tags known to the current TinyCBOR release:
-[will be added in the next commit]
+<table>
+  <tr>
+    <th>Tag</th>
+    <th>Data Item</th>
+    <th>Semantics</th>
+  </tr>
+  <tr>
+    <td>0</td>
+    <td>UTF-8 text string</td>
+    <td>Standard date/time string</td>
+  </td>
+  <tr>
+    <td>1</td>
+    <td>integer</td>
+    <td>Epoch-based date/time</td>
+  </td>
+  <tr>
+    <td>2</td>
+    <td>byte string</td>
+    <td>Positive bignum</td>
+  </td>
+  <tr>
+    <td>3</td>
+    <td>byte string</td>
+    <td>Negative bignum</td>
+  </td>
+  <tr>
+    <td>4</td>
+    <td>array</td>
+    <td>Decimal fraction</td>
+  </td>
+  <tr>
+    <td>5</td>
+    <td>array</td>
+    <td>Bigfloat</td>
+  </td>
+  <tr>
+    <td>21</td>
+    <td>byte string, array, map</td>
+    <td>Expected conversion to base64url encoding</td>
+  </td>
+  <tr>
+    <td>22</td>
+    <td>byte string, array, map</td>
+    <td>Expected conversion to base64 encoding</td>
+  </td>
+  <tr>
+    <td>23</td>
+    <td>byte string, array, map</td>
+    <td>Expected conversion to base16 encoding</td>
+  </td>
+  <tr>
+    <td>24</td>
+    <td>byte string</td>
+    <td>Encoded CBOR data item</td>
+  </td>
+  <tr>
+    <td>32</td>
+    <td>UTF-8 text string</td>
+    <td>URI</td>
+  </td>
+  <tr>
+    <td>33</td>
+    <td>UTF-8 text string</td>
+    <td>base64url</td>
+  </td>
+  <tr>
+    <td>34</td>
+    <td>UTF-8 text string</td>
+    <td>base64</td>
+  </td>
+  <tr>
+    <td>35</td>
+    <td>UTF-8 text string</td>
+    <td>Regular expression</td>
+  </td>
+  <tr>
+    <td>36</td>
+    <td>UTF-8 text string</td>
+    <td>MIME message</td>
+  </td>
+  <tr>
+    <td>55799</td>
+    <td>any</td>
+    <td>Self-describe CBOR</td>
+  </td>
+</table>
  */
 
+struct KnownTagData { uint32_t tag; };
+static const struct KnownTagData knownTagData[] = {
+    { 0 },
+    { 1 },
+    { 2 },
+    { 3 },
+    { 4 },
+    { 5 },
+    { 21 },
+    { 22 },
+    { 23 },
+    { 24 },
+    { 32 },
+    { 33 },
+    { 34 },
+    { 35 },
+    { 36 },
+    { 55799 }
+};
+
 static CborError validate_value(CborValue *it, int flags, int recursionLeft);
 
 static inline CborError validate_simple_type(uint8_t simple_type, int flags)
@@ -135,20 +241,34 @@
 static inline CborError validate_tag(CborValue *it, CborTag tag, int flags, int recursionLeft)
 {
     CborType type = cbor_value_get_type(it);
+    const size_t knownTagCount = sizeof(knownTagData) / sizeof(knownTagData[0]);
+    const struct KnownTagData *tagData = knownTagData;
+    const struct KnownTagData * const knownTagDataEnd = knownTagData + knownTagCount;
+
     if (!recursionLeft)
         return CborErrorNestingTooDeep;
-
     if (flags & CborValidateNoTags)
         return CborErrorExcludedType;
-    if (flags & CborValidateNoUnknownTags) {
-        if (tag > 255 && (flags & CborValidateNoUnknownTagsSR) == 0)
+
+    /* find the tag data, if any */
+    for ( ; tagData != knownTagDataEnd; ++tagData) {
+        if (tagData->tag < tag)
+            continue;
+        if (tagData->tag > tag)
+            tagData = NULL;
+        break;
+    }
+    if (tagData == knownTagDataEnd)
+        tagData = NULL;
+
+    if (flags & CborValidateNoUnknownTags && !tagData) {
+        /* tag not found */
+        if (flags & CborValidateNoUnknownTagsSA && tag < 24)
             return CborErrorUnknownTag;
-        if (flags & CborValidateNoUnknownTagsSR) {
-            if (tag > 23 && (flags & CborValidateNoUnknownTagsSA) == 0)
-                return CborErrorUnknownTag;
-            if (flags & CborValidateNoUnknownTagsSA)
-                return CborErrorUnknownTag;
-        }
+        if ((flags & CborValidateNoUnknownTagsSR) == CborValidateNoUnknownTagsSR && tag < 256)
+            return CborErrorUnknownTag;
+        if ((flags & CborValidateNoUnknownTags) == CborValidateNoUnknownTags)
+            return CborErrorUnknownTag;
     }
 
     return validate_value(it, flags, recursionLeft);
diff --git a/src/parsetags.pl b/src/parsetags.pl
new file mode 100755
index 0000000..f704881
--- /dev/null
+++ b/src/parsetags.pl
@@ -0,0 +1,102 @@
+#!/usr/bin/perl -l
+## Copyright (C) 2017 Intel Corporation
+##
+## Permission is hereby granted, free of charge, to any person obtaining a copy
+## of this software and associated documentation files (the "Software"), to deal
+## in the Software without restriction, including without limitation the rights
+## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+## copies of the Software, and to permit persons to whom the Software is
+## furnished to do so, subject to the following conditions:
+##
+## The above copyright notice and this permission notice shall be included in
+## all copies or substantial portions of the Software.
+##
+## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+## THE SOFTWARE.
+##
+use strict;
+my $fname = shift @ARGV
+    or die("Usage: parsetags.pl tags.txt");
+open TAGS, "<", $fname
+    or die("Cannot open $fname: $!");
+
+my %typedescriptions = (
+    "Integer" => "integer",
+    "ByteString" => "byte string",
+    "TextString" => "UTF-8 text string",
+    "Array" => "array",
+    "Map" => "map",
+    "Tag" => "tag",     # shouldn't happen
+    "Simple" => "any simple type",
+    "Boolean" => "boolean",
+    "Null" => "null",
+    "Undefined" => "undefined",
+    "HalfFloat" => "IEEE 754 half-precision floating point",
+    "Float" => "IEEE 754 single-precision floating point",
+    "Dobule" => "IEEE 754 double-precision floating point"
+);
+
+my %tags;
+while (<TAGS>) {
+    s/\s*#.*$//;
+    next if /^$/;
+    chomp;
+
+    die("Could not parse line \"$_\"")
+        unless /^(\d+);(\w+);([\w,]*);(.*)$/;
+    $tags{$1}{id} = $2;
+    $tags{$1}{semantic} = $4;
+    my @types = split(',', $3);
+    $tags{$1}{types} = \@types;
+}
+close TAGS or die;
+
+my @tagnumbers = sort { $a <=> $b } keys %tags;
+
+print "==== HTML listing ====";
+print "<table>\n  <tr>\n    <th>Tag</th>\n    <th>Data Item</th>\n    <th>Semantics</th>\n  </tr>";
+for my $n (@tagnumbers) {
+    print "  <tr>";
+    print "    <td>$n</td>";
+
+    my @types = @{$tags{$n}{types}};
+    @types = map { $typedescriptions{$_}; } @types;
+    unshift @types, "any"
+        if (scalar @types == 0);
+    printf "    <td>%s</td>\n", join(', ', @types);
+    printf "    <td>%s</td>\n", $tags{$n}{semantic};
+    print "  </td>";
+}
+print "</table>";
+
+print "\n==== enum listing for cbor.h ====\n";
+printf "typedef enum CborKnownTags {";
+my $comma = "";
+for my $n (@tagnumbers) {
+    printf "%s\n    Cbor%sTag%s = %d", $comma,
+        $tags{$n}{id},
+        ' ' x (23 - length($tags{$n}{id})),
+        $n;
+    $comma = ",";
+}
+print "\n} CborKnownTags;";
+
+print "\n==== search table ====\n";
+print "struct KnownTagData { uint32_t tag; };";
+printf "static const struct KnownTagData knownTagData[] = {";
+$comma = "";
+for my $n (@tagnumbers) {
+    my @types = @{$tags{$n}{types}};
+
+    my $typemask;
+    my $shift = 0;
+
+    printf "%s\n    { %d }", $comma, $n;
+    $comma = ",";
+}
+print "\n};";
diff --git a/src/tags.txt b/src/tags.txt
new file mode 100644
index 0000000..02eeff9
--- /dev/null
+++ b/src/tags.txt
@@ -0,0 +1,17 @@
+# Tag number; Tag ID; Applicable types (comma-separated); Semantics
+0;DateTimeString;TextString;Standard date/time string
+1;UnixTime_t;Integer;Epoch-based date/time
+2;PositiveBignum;ByteString;Positive bignum
+3;NegativeBignum;ByteString;Negative bignum
+4;Decimal;Array;Decimal fraction
+5;Bigfloat;Array;Bigfloat
+21;ExpectedBase64url;ByteString,Array,Map;Expected conversion to base64url encoding
+22;ExpectedBase64;ByteString,Array,Map;Expected conversion to base64 encoding
+23;ExpectedBase16;ByteString,Array,Map;Expected conversion to base16 encoding
+24;EncodedCbor;ByteString;Encoded CBOR data item
+32;Url;TextString;URI
+33;Base64url;TextString;base64url
+34;Base64;TextString;base64
+35;RegularExpression;TextString;Regular expression
+36;MimeMessage;TextString;MIME message
+55799;Signature;;Self-describe CBOR
diff --git a/tests/parser/tst_parser.cpp b/tests/parser/tst_parser.cpp
index 484d1b1..02801b8 100644
--- a/tests/parser/tst_parser.cpp
+++ b/tests/parser/tst_parser.cpp
@@ -1650,8 +1650,8 @@
     QTest::newRow("unknown-tag-256") << raw("\xd9\1\0\x60") << int(CborValidateNoUnknownTags) << CborErrorUnknownTag;
     QTest::newRow("unknown-tag-65536") << raw("\xda\0\1\0\0\x60") << int(CborValidateNoUnknownTags) << CborErrorUnknownTag;
     QTest::newRow("unknown-tag-4294967296") << raw("\xdb\0\0\0\1\0\0\0\0\x60") << int(CborValidateNoUnknownTags) << CborErrorUnknownTag;
-//    QTest::newRow("allowed-tag-31") << raw("\xd8\x1f\x60") << int(CborValidateNoUnknownTagsSA) << CborNoError;
-//    QTest::newRow("allowed-tag-256") << raw("\xd8\x1f\x60") << int(CborValidateNoUnknownTagsSR) << CborNoError;
+    QTest::newRow("allowed-tag-31") << raw("\xd8\x1f\x60") << int(CborValidateNoUnknownTagsSA) << CborNoError;
+    QTest::newRow("allowed-tag-256") << raw("\xd9\1\0\x60") << int(CborValidateNoUnknownTagsSR) << CborNoError;
 
     // excluded tags
     QTest::newRow("excluded-tag-0") << raw("\xc0\x60") << int(CborValidateNoTags) << CborErrorExcludedType;