Congbin Guo | c427758 | 2018-06-06 16:44:48 -0700 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # Copyright 2018 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | """This module provides utils to handle response of "Range Request".""" |
| 6 | |
| 7 | from __future__ import absolute_import |
| 8 | from __future__ import division |
| 9 | from __future__ import print_function |
| 10 | |
| 11 | import constants |
| 12 | |
| 13 | |
| 14 | class FormatError(Exception): |
| 15 | """Exception raised when we parse wrong format of response.""" |
| 16 | |
| 17 | |
| 18 | class NoFileFoundError(Exception): |
| 19 | """Exception raised when we cannot get a file match the range.""" |
| 20 | |
| 21 | |
| 22 | class FileIterator(object): |
| 23 | """The iterator of files in a response of multipart range request. |
| 24 | |
| 25 | An example response is like: |
| 26 | |
| 27 | HTTP/1.1 206 Partial Content |
| 28 | Content-Type: multipart/byteranges; boundary=magic_string |
| 29 | Content-Length: 282 |
| 30 | |
| 31 | --magic_string |
| 32 | Content-Type: text/html |
| 33 | Content-Range: bytes 0-50/1270 |
| 34 | |
| 35 | <data> |
| 36 | --magic_string |
| 37 | Content-Type: text/html |
| 38 | Content-Range: bytes 100-150/1270 |
| 39 | |
| 40 | <data> |
| 41 | --magic_string-- |
| 42 | |
| 43 | In our application, each part is the content of a file. This class iterates |
| 44 | the files. |
| 45 | """ |
| 46 | |
| 47 | def __init__(self, response, file_info_list): |
| 48 | """Constructor. |
| 49 | |
| 50 | Args: |
| 51 | response: An instance of requests.response. |
| 52 | file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look |
| 53 | up file name by content start offset and size. |
| 54 | """ |
| 55 | self._response_iter = response.iter_content( |
| 56 | constants.READ_BUFFER_SIZE_BYTES) |
| 57 | self._chunk = None |
| 58 | self._file_name_map = {(f.content_start, int(f.size)): f.filename |
| 59 | for f in file_info_list} |
| 60 | |
| 61 | def __iter__(self): |
| 62 | self._chunk = next(self._response_iter) |
| 63 | return self._iter_files() |
| 64 | |
| 65 | def _read_next_chunk(self): |
| 66 | """Helper function to read next chunk of data and return current chunk.""" |
| 67 | buffered = self._chunk |
| 68 | try: |
| 69 | self._chunk = next(self._response_iter) |
| 70 | except StopIteration: |
| 71 | self._chunk = None |
| 72 | |
| 73 | return buffered |
| 74 | |
| 75 | def _read_line(self): |
| 76 | """Read one CRLF ended line from the response. |
| 77 | |
| 78 | Returns: |
| 79 | The line read. Return None if nothing to read. |
| 80 | """ |
| 81 | if self._chunk is None: |
| 82 | return None |
| 83 | |
| 84 | buffered = '' |
| 85 | while True: |
| 86 | buffered += self._chunk |
| 87 | parts = buffered.split('\r\n', 1) |
| 88 | if len(parts) == 2: |
| 89 | line, self._chunk = parts |
| 90 | return line |
| 91 | else: # No '\r\n' in current chunk. Read one more. |
| 92 | self._read_next_chunk() |
| 93 | if self._chunk is None: |
| 94 | return buffered |
| 95 | |
| 96 | def _read_bytes(self, max_bytes): |
| 97 | """Read at most |max_bytes| bytes from the response. |
| 98 | |
| 99 | Args: |
| 100 | max_bytes: An integer of maximum bytes of bytes to read. |
| 101 | |
| 102 | Returns: |
| 103 | The bytes read. Return None if nothing to read. |
| 104 | """ |
| 105 | if self._chunk is None: |
| 106 | return None |
| 107 | |
| 108 | buffered = '' |
| 109 | bytes_remaining = max_bytes |
| 110 | while True: |
| 111 | bytes_remaining -= len(self._chunk) |
| 112 | if bytes_remaining < 0: |
| 113 | buffered += self._chunk[:bytes_remaining] |
| 114 | self._chunk = self._chunk[bytes_remaining:] |
| 115 | return buffered |
| 116 | |
| 117 | buffered += self._read_next_chunk() |
| 118 | if self._chunk is None: |
| 119 | return buffered |
| 120 | |
| 121 | def _read_empty_line(self): |
| 122 | """Read one line and assert it is empty.""" |
| 123 | line = self._read_line() |
| 124 | if line is None: |
| 125 | raise FormatError('Expect an empty line, but got EOF.') |
| 126 | |
| 127 | if line: |
| 128 | raise FormatError('Expect an empty line, but got "%s".' % line) |
| 129 | |
| 130 | def _iter_files(self): |
| 131 | """Iterate the files in the response. |
| 132 | |
| 133 | Yields: |
| 134 | A pair of (name, content) of the file. |
| 135 | |
| 136 | Raises: |
| 137 | FormatError: Raised when response content interrupted. |
| 138 | NoFileFoundError: Raised when we cannot get a file matches the range. |
| 139 | """ |
| 140 | self._read_empty_line() # The first line is empty. |
| 141 | while True: |
| 142 | self._read_line() # The second line is the boundary. |
| 143 | self._read_line() # The line sub content type. |
| 144 | sub_range_header = self._read_line() # The line of sub content range. |
| 145 | if sub_range_header is None: |
| 146 | break |
| 147 | self._read_empty_line() # Another empty line. |
| 148 | |
| 149 | # The header format is: "Content-Range: bytes START-END/TOTAL" |
| 150 | try: |
| 151 | start, end = sub_range_header.split(' ')[2].split('/')[0].split('-') |
| 152 | size = int(end) - int(start) + 1 |
| 153 | except (IndexError, ValueError): |
| 154 | raise FormatError('Wrong format of sub content range header: %s' % |
| 155 | sub_range_header) |
| 156 | try: |
| 157 | filename = self._file_name_map[(start, size)] |
| 158 | except KeyError: |
| 159 | raise NoFileFoundError('Cannot find a file matches the range %s' % |
| 160 | sub_range_header) |
| 161 | |
| 162 | content = self._read_bytes(size) |
| 163 | self._read_empty_line() # Every content has a trailing '\r\n'. |
| 164 | |
| 165 | bytes_read = 0 if content is None else len(content) |
| 166 | if bytes_read != size: |
| 167 | raise FormatError( |
| 168 | '%s: Error in reading content (read %d B, expect %d B)' % |
| 169 | (filename, bytes_read, size) |
| 170 | ) |
| 171 | |
| 172 | yield filename, content |