GS Cache: handle response of multipart range request.
Internally, We use multi-range request to extract files from a tar, e.g.
curl http://gs_cache/download/archive.tar \
-H 'Range: bytes=<file1_start>-<file1_end>,<file2_start>-<file2_end>'
The response is like:
HTTP/1.1 206 Partial Content
Content-Type: multipart/byteranges; boundary=magic_string
Content-Length: 282
--magic_string
Content-Type: text/html
Content-Range: bytes 0-50/1270
<data>
--magic_string
Content-Type: text/html
Content-Range: bytes 100-150/1270
<data>
--magic_string--
This change provides utils to convert this response to a series tuples
of (filename, file_content).
BUG=chromium:824580
TEST=Ran unit tests.
Change-Id: Ib6935aefbad17b76c378b268699af07723b25acf
Reviewed-on: https://chromium-review.googlesource.com/1090074
Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com>
Tested-by: Congbin Guo <guocb@chromium.org>
Reviewed-by: Congbin Guo <guocb@chromium.org>
diff --git a/gs_cache/range_response.py b/gs_cache/range_response.py
new file mode 100644
index 0000000..8883003
--- /dev/null
+++ b/gs_cache/range_response.py
@@ -0,0 +1,172 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""This module provides utils to handle response of "Range Request"."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import constants
+
+
+class FormatError(Exception):
+ """Exception raised when we parse wrong format of response."""
+
+
+class NoFileFoundError(Exception):
+ """Exception raised when we cannot get a file match the range."""
+
+
+class FileIterator(object):
+ """The iterator of files in a response of multipart range request.
+
+ An example response is like:
+
+ HTTP/1.1 206 Partial Content
+ Content-Type: multipart/byteranges; boundary=magic_string
+ Content-Length: 282
+
+ --magic_string
+ Content-Type: text/html
+ Content-Range: bytes 0-50/1270
+
+ <data>
+ --magic_string
+ Content-Type: text/html
+ Content-Range: bytes 100-150/1270
+
+ <data>
+ --magic_string--
+
+ In our application, each part is the content of a file. This class iterates
+ the files.
+ """
+
+ def __init__(self, response, file_info_list):
+ """Constructor.
+
+ Args:
+ response: An instance of requests.response.
+ file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
+ up file name by content start offset and size.
+ """
+ self._response_iter = response.iter_content(
+ constants.READ_BUFFER_SIZE_BYTES)
+ self._chunk = None
+ self._file_name_map = {(f.content_start, int(f.size)): f.filename
+ for f in file_info_list}
+
+ def __iter__(self):
+ self._chunk = next(self._response_iter)
+ return self._iter_files()
+
+ def _read_next_chunk(self):
+ """Helper function to read next chunk of data and return current chunk."""
+ buffered = self._chunk
+ try:
+ self._chunk = next(self._response_iter)
+ except StopIteration:
+ self._chunk = None
+
+ return buffered
+
+ def _read_line(self):
+ """Read one CRLF ended line from the response.
+
+ Returns:
+ The line read. Return None if nothing to read.
+ """
+ if self._chunk is None:
+ return None
+
+ buffered = ''
+ while True:
+ buffered += self._chunk
+ parts = buffered.split('\r\n', 1)
+ if len(parts) == 2:
+ line, self._chunk = parts
+ return line
+ else: # No '\r\n' in current chunk. Read one more.
+ self._read_next_chunk()
+ if self._chunk is None:
+ return buffered
+
+ def _read_bytes(self, max_bytes):
+ """Read at most |max_bytes| bytes from the response.
+
+ Args:
+ max_bytes: An integer of maximum bytes of bytes to read.
+
+ Returns:
+ The bytes read. Return None if nothing to read.
+ """
+ if self._chunk is None:
+ return None
+
+ buffered = ''
+ bytes_remaining = max_bytes
+ while True:
+ bytes_remaining -= len(self._chunk)
+ if bytes_remaining < 0:
+ buffered += self._chunk[:bytes_remaining]
+ self._chunk = self._chunk[bytes_remaining:]
+ return buffered
+
+ buffered += self._read_next_chunk()
+ if self._chunk is None:
+ return buffered
+
+ def _read_empty_line(self):
+ """Read one line and assert it is empty."""
+ line = self._read_line()
+ if line is None:
+ raise FormatError('Expect an empty line, but got EOF.')
+
+ if line:
+ raise FormatError('Expect an empty line, but got "%s".' % line)
+
+ def _iter_files(self):
+ """Iterate the files in the response.
+
+ Yields:
+ A pair of (name, content) of the file.
+
+ Raises:
+ FormatError: Raised when response content interrupted.
+ NoFileFoundError: Raised when we cannot get a file matches the range.
+ """
+ self._read_empty_line() # The first line is empty.
+ while True:
+ self._read_line() # The second line is the boundary.
+ self._read_line() # The line sub content type.
+ sub_range_header = self._read_line() # The line of sub content range.
+ if sub_range_header is None:
+ break
+ self._read_empty_line() # Another empty line.
+
+ # The header format is: "Content-Range: bytes START-END/TOTAL"
+ try:
+ start, end = sub_range_header.split(' ')[2].split('/')[0].split('-')
+ size = int(end) - int(start) + 1
+ except (IndexError, ValueError):
+ raise FormatError('Wrong format of sub content range header: %s' %
+ sub_range_header)
+ try:
+ filename = self._file_name_map[(start, size)]
+ except KeyError:
+ raise NoFileFoundError('Cannot find a file matches the range %s' %
+ sub_range_header)
+
+ content = self._read_bytes(size)
+ self._read_empty_line() # Every content has a trailing '\r\n'.
+
+ bytes_read = 0 if content is None else len(content)
+ if bytes_read != size:
+ raise FormatError(
+ '%s: Error in reading content (read %d B, expect %d B)' %
+ (filename, bytes_read, size)
+ )
+
+ yield filename, content