blob: 8883003821ed34f96af29d325b7e6846a160525c [file] [log] [blame]
Congbin Guoc4277582018-06-06 16:44:48 -07001# -*- coding: utf-8 -*-
2# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""This module provides utils to handle response of "Range Request"."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
11import constants
12
13
14class FormatError(Exception):
15 """Exception raised when we parse wrong format of response."""
16
17
18class NoFileFoundError(Exception):
19 """Exception raised when we cannot get a file match the range."""
20
21
22class FileIterator(object):
23 """The iterator of files in a response of multipart range request.
24
25 An example response is like:
26
27 HTTP/1.1 206 Partial Content
28 Content-Type: multipart/byteranges; boundary=magic_string
29 Content-Length: 282
30
31 --magic_string
32 Content-Type: text/html
33 Content-Range: bytes 0-50/1270
34
35 <data>
36 --magic_string
37 Content-Type: text/html
38 Content-Range: bytes 100-150/1270
39
40 <data>
41 --magic_string--
42
43 In our application, each part is the content of a file. This class iterates
44 the files.
45 """
46
47 def __init__(self, response, file_info_list):
48 """Constructor.
49
50 Args:
51 response: An instance of requests.response.
52 file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
53 up file name by content start offset and size.
54 """
55 self._response_iter = response.iter_content(
56 constants.READ_BUFFER_SIZE_BYTES)
57 self._chunk = None
58 self._file_name_map = {(f.content_start, int(f.size)): f.filename
59 for f in file_info_list}
60
61 def __iter__(self):
62 self._chunk = next(self._response_iter)
63 return self._iter_files()
64
65 def _read_next_chunk(self):
66 """Helper function to read next chunk of data and return current chunk."""
67 buffered = self._chunk
68 try:
69 self._chunk = next(self._response_iter)
70 except StopIteration:
71 self._chunk = None
72
73 return buffered
74
75 def _read_line(self):
76 """Read one CRLF ended line from the response.
77
78 Returns:
79 The line read. Return None if nothing to read.
80 """
81 if self._chunk is None:
82 return None
83
84 buffered = ''
85 while True:
86 buffered += self._chunk
87 parts = buffered.split('\r\n', 1)
88 if len(parts) == 2:
89 line, self._chunk = parts
90 return line
91 else: # No '\r\n' in current chunk. Read one more.
92 self._read_next_chunk()
93 if self._chunk is None:
94 return buffered
95
96 def _read_bytes(self, max_bytes):
97 """Read at most |max_bytes| bytes from the response.
98
99 Args:
100 max_bytes: An integer of maximum bytes of bytes to read.
101
102 Returns:
103 The bytes read. Return None if nothing to read.
104 """
105 if self._chunk is None:
106 return None
107
108 buffered = ''
109 bytes_remaining = max_bytes
110 while True:
111 bytes_remaining -= len(self._chunk)
112 if bytes_remaining < 0:
113 buffered += self._chunk[:bytes_remaining]
114 self._chunk = self._chunk[bytes_remaining:]
115 return buffered
116
117 buffered += self._read_next_chunk()
118 if self._chunk is None:
119 return buffered
120
121 def _read_empty_line(self):
122 """Read one line and assert it is empty."""
123 line = self._read_line()
124 if line is None:
125 raise FormatError('Expect an empty line, but got EOF.')
126
127 if line:
128 raise FormatError('Expect an empty line, but got "%s".' % line)
129
130 def _iter_files(self):
131 """Iterate the files in the response.
132
133 Yields:
134 A pair of (name, content) of the file.
135
136 Raises:
137 FormatError: Raised when response content interrupted.
138 NoFileFoundError: Raised when we cannot get a file matches the range.
139 """
140 self._read_empty_line() # The first line is empty.
141 while True:
142 self._read_line() # The second line is the boundary.
143 self._read_line() # The line sub content type.
144 sub_range_header = self._read_line() # The line of sub content range.
145 if sub_range_header is None:
146 break
147 self._read_empty_line() # Another empty line.
148
149 # The header format is: "Content-Range: bytes START-END/TOTAL"
150 try:
151 start, end = sub_range_header.split(' ')[2].split('/')[0].split('-')
152 size = int(end) - int(start) + 1
153 except (IndexError, ValueError):
154 raise FormatError('Wrong format of sub content range header: %s' %
155 sub_range_header)
156 try:
157 filename = self._file_name_map[(start, size)]
158 except KeyError:
159 raise NoFileFoundError('Cannot find a file matches the range %s' %
160 sub_range_header)
161
162 content = self._read_bytes(size)
163 self._read_empty_line() # Every content has a trailing '\r\n'.
164
165 bytes_read = 0 if content is None else len(content)
166 if bytes_read != size:
167 raise FormatError(
168 '%s: Error in reading content (read %d B, expect %d B)' %
169 (filename, bytes_read, size)
170 )
171
172 yield filename, content