Add support for parsing chunked strings This is the last remainng parsing API. It provides a pair of functions that extract either a chunk from a byte string or from a text string. They operate also in non-chunked strings and simulates a one-chunk string, so code can be generic. With it, it's possible to support zero-copy parsing of both types of strings with TinyCBOR. Signed-off-by: Thiago Macieira <thiago.macieira@intel.com>

commit: 51b5606a0f5bcc48e07629df8dfabbf37cdb7da8 [log] [tgz]
author: Thiago Macieira <thiago.macieira@intel.com> Thu Feb 23 16:03:13 2017 -0800
committer: Thiago Macieira <thiago.macieira@intel.com> Wed Mar 15 18:31:52 2017 -0700
tree: 3f93f482b648ad01b00c9a883fef48011409441a
parent: da8e35ef3fac04574ee9a945e78328705ddbf668 [diff] [blame]
diff --git a/src/cborparser.c b/src/cborparser.c
index ae10c0e..01476ba 100644
--- a/src/cborparser.c
+++ b/src/cborparser.c

@@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2016 Intel Corporation
+** Copyright (C) 2017 Intel Corporation
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a copy
 ** of this software and associated documentation files (the "Software"), to deal
@@ -932,7 +932,7 @@
  * CborErrorDataTooLarge if the stream indicates a length that is too big to
  * fit in 32-bit.
  *
- * \sa cbor_value_get_string_length(), cbor_value_copy_string(), cbor_value_is_length_known()
+ * \sa cbor_value_get_string_length(), cbor_value_copy_text_string(), cbor_value_copy_byte_string(), cbor_value_is_length_known()
  */
 CborError cbor_value_calculate_string_length(const CborValue *value, size_t *len)
 {
@@ -940,6 +940,146 @@
     return _cbor_value_copy_string(value, NULL, len, NULL);
 }
 
+static CborError get_string_chunk(CborValue *it, const void **bufferptr, size_t *len)
+{
+    CborError err;
+
+    /* Possible states:
+     * length known | iterating | meaning
+     *     no       |    no     | before the first chunk of a chunked string
+     *     yes      |    no     | at a non-chunked string
+     *     no       |    yes    | second or later chunk
+     *     yes      |    yes    | after a non-chunked string
+     */
+    if (it->flags & CborIteratorFlag_IteratingStringChunks) {
+        /* already iterating */
+        if (cbor_value_is_length_known(it)) {
+            /* if the length was known, it wasn't chunked, so finish iteration */
+            goto last_chunk;
+        }
+    } else if (!cbor_value_is_length_known(it)) {
+        /* chunked string, we're before the first chunk */
+        ++it->ptr;
+    }
+
+    /* are we at the end? */
+    if (it->ptr == it->parser->end)
+        return CborErrorUnexpectedEOF;
+
+    if (*it->ptr == BreakByte) {
+        /* last chunk */
+        ++it->ptr;
+last_chunk:
+        *bufferptr = NULL;
+        return preparse_next_value(it);
+    } else if ((uint8_t)(*it->ptr & MajorTypeMask) == it->type) {
+        err = extract_length(it->parser, &it->ptr, len);
+        if (err)
+            return err;
+        if (*len > (size_t)(it->parser->end - it->ptr))
+            return CborErrorUnexpectedEOF;
+
+        *bufferptr = it->ptr;
+        it->ptr += *len;
+    } else {
+        return CborErrorIllegalType;
+    }
+
+    it->flags |= CborIteratorFlag_IteratingStringChunks;
+    return CborNoError;
+}
+
+/**
+ * \fn CborError cbor_value_get_text_string_chunk(const CborValue *value, const char **bufferptr, size_t *len, CborValue *next)
+ *
+ * Extracts one text string chunk pointed to by \a value and stores a pointer
+ * to the data in \a buffer and the size in \a len, which must not be null. If
+ * no more chunks are available, then \a bufferptr will be set to null. This
+ * function may be used to iterate over any string without causing its contents
+ * to be copied to a separate buffer, like the convenience function
+ * cbor_value_copy_text_string() does.
+ *
+ * It is designed to be used in code like:
+ *
+ * \code
+ *   if (cbor_value_is_text_string(value)) {
+ *       char *ptr;
+ *       size_t len;
+ *       while (1) {
+ *           err = cbor_value_get_text_string_chunk(value, &ptr, &len, &value));
+ *           if (err) return err;
+ *           if (ptr == NULL) return CborNoError;
+ *           consume(ptr, len);
+ *       }
+ *   }
+ * \endcode
+ *
+ * If the iterator \a value does not point to a text string, the behaviour is
+ * undefined, so checking with \ref cbor_value_get_type or \ref
+ * cbor_value_is_text_string is recommended.
+ *
+ * The \a next pointer, if not null, will be updated to point to the next item
+ * after this string. During iteration, the pointer must only be passed back
+ * again to this function; passing it to any other function in this library
+ * results in undefined behavior. If there are no more chunks to be read from
+ * \a value, then \a next will be set to the next item after this string; if \a
+ * value points to the last item, then \a next will be invalid.
+ *
+ * \note This function does not perform UTF-8 validation on the incoming text
+ * string.
+ *
+ * \sa cbor_value_dup_text_string(), cbor_value_copy_text_string(), cbor_value_caculate_string_length(), cbor_value_get_byte_string_chunk()
+ */
+
+/**
+ * \fn CborError cbor_value_get_byte_string_chunk(const CborValue *value, const char **bufferptr, size_t *len, CborValue *next)
+ *
+ * Extracts one byte string chunk pointed to by \a value and stores a pointer
+ * to the data in \a buffer and the size in \a len, which must not be null. If
+ * no more chunks are available, then \a bufferptr will be set to null. This
+ * function may be used to iterate over any string without causing its contents
+ * to be copied to a separate buffer, like the convenience function
+ * cbor_value_copy_byte_string() does.
+ *
+ * It is designed to be used in code like:
+ *
+ * \code
+ *   if (cbor_value_is_byte_string(value)) {
+ *       char *ptr;
+ *       size_t len;
+ *       while (1) {
+ *           err = cbor_value_get_byte_string_chunk(value, &ptr, &len, &value));
+ *           if (err) return err;
+ *           if (ptr == NULL) return CborNoError;
+ *           consume(ptr, len);
+ *       }
+ *   }
+ * \endcode
+ *
+ * If the iterator \a value does not point to a byte string, the behaviour is
+ * undefined, so checking with \ref cbor_value_get_type or \ref
+ * cbor_value_is_byte_string is recommended.
+ *
+ * The \a next pointer, if not null, will be updated to point to the next item
+ * after this string. During iteration, the pointer must only be passed back
+ * again to this function; passing it to any other function in this library
+ * results in undefined behavior. If there are no more chunks to be read from
+ * \a value, then \a next will be set to the next item after this string; if \a
+ * value points to the last item, then \a next will be invalid.
+ *
+ * \sa cbor_value_dup_byte_string(), cbor_value_copy_byte_string(), cbor_value_caculate_string_length(), cbor_value_get_text_string_chunk()
+ */
+
+CborError _cbor_value_get_string_chunk(const CborValue *value, const void **bufferptr,
+                                                        size_t *len, CborValue *next)
+{
+    CborValue tmp;
+    if (!next)
+        next = &tmp;
+    *next = *value;
+    return get_string_chunk(next, bufferptr, len);
+}
+
 /* We return uintptr_t so that we can pass memcpy directly as the iteration
  * function. The choice is to optimize for memcpy, which is used in the base
  * parser API (cbor_value_copy_string), while memcmp is used in convenience API
@@ -1066,7 +1206,7 @@
  * \note This function does not perform UTF-8 validation on the incoming text
  * string.
  *
- * \sa cbor_value_dup_text_string(), cbor_value_copy_byte_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
+ * \sa cbor_value_get_text_string_chunk() cbor_value_dup_text_string(), cbor_value_copy_byte_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
  */
 
 /**
@@ -1097,7 +1237,7 @@
  * This function may not run in constant time (it will run in O(n) time on the
  * number of chunks). It requires constant memory (O(1)).
  *
- * \sa cbor_value_dup_text_string(), cbor_value_copy_text_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
+ * \sa cbor_value_get_byte_string_chunk(), cbor_value_dup_text_string(), cbor_value_copy_text_string(), cbor_value_get_string_length(), cbor_value_calculate_string_length()
  */
 
 CborError _cbor_value_copy_string(const CborValue *value, void *buffer,
commit	51b5606a0f5bcc48e07629df8dfabbf37cdb7da8	[log] [tgz]
author	Thiago Macieira <thiago.macieira@intel.com>	Thu Feb 23 16:03:13 2017 -0800
committer	Thiago Macieira <thiago.macieira@intel.com>	Wed Mar 15 18:31:52 2017 -0700
tree	3f93f482b648ad01b00c9a883fef48011409441a
parent	da8e35ef3fac04574ee9a945e78328705ddbf668 [diff] [blame]