blob: 8b3e45e76942d9818bfc347bb8d487510c8d3bb3 [file] [log] [blame]
Congbin Guoc4277582018-06-06 16:44:48 -07001# -*- coding: utf-8 -*-
2# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""This module provides utils to handle response of "Range Request"."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
Congbin Guo3c6cc4b2018-06-14 17:45:10 -070011import collections
12import itertools
13import json
14import re
15
Congbin Guoc4277582018-06-06 16:44:48 -070016import constants
17
Congbin Guo3c6cc4b2018-06-14 17:45:10 -070018_RANGE_HEADER_SEPARATORS = re.compile('[-/ ]')
19
20_ContentRangeHeader = collections.namedtuple('_ContentRangeHeader',
21 ('bytes', 'start', 'end', 'total'))
22
Congbin Guoc4277582018-06-06 16:44:48 -070023
24class FormatError(Exception):
25 """Exception raised when we parse wrong format of response."""
26
27
28class NoFileFoundError(Exception):
29 """Exception raised when we cannot get a file match the range."""
30
31
Congbin Guo3c6cc4b2018-06-14 17:45:10 -070032class ResponseQueueError(Exception):
33 """Exception raised when trying to queue responses not allowed."""
34
35
36def _get_file_by_range_header(range_header_str, file_name_map):
37 """Get file name and size by the Content-Range header.
38
39 The format of Content-Range header is like:
40 Content-Range: bytes <start>-<end>/<total>
41 We get the <start> and <end> from it and retrieve the file name from
42 |file_name_map|.
43
44 Args:
45 range_header_str: A string of range header.
46 file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.
47
48 Returns:
49 A tuple of (filename, size).
50
51 Raises:
52 FormatError: Raised when response content interrupted.
53 NoFileFoundError: Raised when we cannot get a file matches the range.
54 """
55 # Split the part of 'Content-Range:' first if needed.
56 if range_header_str.lower().startswith('content-range:'):
57 range_header_str = range_header_str.split(': ', 1)[1]
58
59 try:
60 range_header = _ContentRangeHeader._make(
61 _RANGE_HEADER_SEPARATORS.split(range_header_str)
62 )
63 size = int(range_header.end) - int(range_header.start) + 1
64 except (IndexError, ValueError):
65 raise FormatError('Wrong format of content range header: %s' %
66 range_header_str)
67
68 try:
69 filename = file_name_map[(range_header.start, size)]
70 except KeyError:
71 raise NoFileFoundError('Cannot find a file matches the range %s' %
72 range_header_str)
73
74 return filename, size
75
76
77class JsonStreamer(object):
78 """A class to stream the responses for range requests.
79
80 The class accepts responses and format the file content in all of them as a
81 JSON stream. The format:
82 '{"<filename>": "<content>", "<filename>": "<content>", ...}'
83 """
84
85 def __init__(self):
86 self._files_iter_list = []
87 self._can_add_more_response = True
88
89 def queue_response(self, response, file_info_list):
90 """Add a reponse to the queue to be streamed as JSON.
91
92 We can add either:
93 1. one and only one response for single-part range requests, or
94 2. a series of responses for multi-part range requests.
95
96 Args:
97 response: An instance of requests.Response, which may be the response of a
98 single range request, or a multi-part range request.
99 file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
100 up file name by content start offset and size.
101
102 Raises:
103 FormatError: Raised when response to be queued isn't for a range request.
104 ResponseQueueError: Raised when either queuing more than one response for
105 single-part range request, or mixed responses for single-part and
106 multi-part range request.
107 """
108 if not self._can_add_more_response:
109 raise ResponseQueueError(
110 'No more reponses can be added when there was a response for '
111 'single-part range request in the queue!')
112
113 file_name_map = {(f.content_start, int(f.size)): f.filename
114 for f in file_info_list}
115
116 # Check if the response is for single range, or multi-part range. For a
117 # single range request, the response must have header 'Content-Range'. For a
118 # multi-part ranges request, the Content-Type header must be like
119 # 'multipart/byteranges; ......'.
120 content_range = response.headers.get('Content-Range', None)
121 content_type = response.headers.get('Content-Type', '')
122
123 if content_range:
124 if self._files_iter_list:
125 raise ResponseQueueError(
126 'Cannot queue more than one responses for single-part range '
127 'request, or mix responses for single-part and multi-part.')
128 filename, _ = _get_file_by_range_header(content_range, file_name_map)
129 self._files_iter_list = [iter([(filename, response.content)])]
130 self._can_add_more_response = False
131
132 elif content_type.startswith('multipart/byteranges;'):
133 self._files_iter_list.append(
134 iter(_FileIterator(response, file_name_map)))
135
136 else:
137 raise FormatError('The response is not for a range request.')
138
139 def stream(self):
140 """Yield the series of responses content as a JSON stream.
141
142 Yields:
143 A JSON stream in format described above.
144 """
145 files_iter = itertools.chain(*self._files_iter_list)
146
147 json_encoder = json.JSONEncoder()
148 filename, content = next(files_iter)
149 yield '{%s: %s' % (json_encoder.encode(filename),
150 json_encoder.encode(content))
151 for filename, content in files_iter:
152 yield ', %s: %s' % (json_encoder.encode(filename),
153 json_encoder.encode(content))
154 yield '}'
155
156
157class _FileIterator(object):
158 """The iterator of files in a response of multi-part range request.
Congbin Guoc4277582018-06-06 16:44:48 -0700159
160 An example response is like:
161
162 HTTP/1.1 206 Partial Content
163 Content-Type: multipart/byteranges; boundary=magic_string
164 Content-Length: 282
165
166 --magic_string
167 Content-Type: text/html
168 Content-Range: bytes 0-50/1270
169
170 <data>
171 --magic_string
172 Content-Type: text/html
173 Content-Range: bytes 100-150/1270
174
175 <data>
176 --magic_string--
177
178 In our application, each part is the content of a file. This class iterates
179 the files.
180 """
181
Congbin Guo3c6cc4b2018-06-14 17:45:10 -0700182 def __init__(self, response, file_name_map):
Congbin Guoc4277582018-06-06 16:44:48 -0700183 """Constructor.
184
185 Args:
186 response: An instance of requests.response.
Congbin Guo3c6cc4b2018-06-14 17:45:10 -0700187 file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.
Congbin Guoc4277582018-06-06 16:44:48 -0700188 """
189 self._response_iter = response.iter_content(
190 constants.READ_BUFFER_SIZE_BYTES)
191 self._chunk = None
Congbin Guo3c6cc4b2018-06-14 17:45:10 -0700192 self._file_name_map = file_name_map
Congbin Guoc4277582018-06-06 16:44:48 -0700193
194 def __iter__(self):
195 self._chunk = next(self._response_iter)
196 return self._iter_files()
197
198 def _read_next_chunk(self):
199 """Helper function to read next chunk of data and return current chunk."""
200 buffered = self._chunk
201 try:
202 self._chunk = next(self._response_iter)
203 except StopIteration:
204 self._chunk = None
205
206 return buffered
207
208 def _read_line(self):
209 """Read one CRLF ended line from the response.
210
211 Returns:
212 The line read. Return None if nothing to read.
213 """
214 if self._chunk is None:
215 return None
216
217 buffered = ''
218 while True:
219 buffered += self._chunk
220 parts = buffered.split('\r\n', 1)
221 if len(parts) == 2:
222 line, self._chunk = parts
223 return line
224 else: # No '\r\n' in current chunk. Read one more.
225 self._read_next_chunk()
226 if self._chunk is None:
227 return buffered
228
229 def _read_bytes(self, max_bytes):
230 """Read at most |max_bytes| bytes from the response.
231
232 Args:
233 max_bytes: An integer of maximum bytes of bytes to read.
234
235 Returns:
236 The bytes read. Return None if nothing to read.
237 """
238 if self._chunk is None:
239 return None
240
241 buffered = ''
242 bytes_remaining = max_bytes
243 while True:
244 bytes_remaining -= len(self._chunk)
245 if bytes_remaining < 0:
246 buffered += self._chunk[:bytes_remaining]
247 self._chunk = self._chunk[bytes_remaining:]
248 return buffered
249
250 buffered += self._read_next_chunk()
251 if self._chunk is None:
252 return buffered
253
254 def _read_empty_line(self):
255 """Read one line and assert it is empty."""
256 line = self._read_line()
257 if line is None:
258 raise FormatError('Expect an empty line, but got EOF.')
259
260 if line:
261 raise FormatError('Expect an empty line, but got "%s".' % line)
262
263 def _iter_files(self):
264 """Iterate the files in the response.
265
266 Yields:
267 A pair of (name, content) of the file.
268
269 Raises:
270 FormatError: Raised when response content interrupted.
Congbin Guoc4277582018-06-06 16:44:48 -0700271 """
272 self._read_empty_line() # The first line is empty.
273 while True:
274 self._read_line() # The second line is the boundary.
275 self._read_line() # The line sub content type.
276 sub_range_header = self._read_line() # The line of sub content range.
277 if sub_range_header is None:
278 break
279 self._read_empty_line() # Another empty line.
280
Congbin Guo3c6cc4b2018-06-14 17:45:10 -0700281 filename, size = _get_file_by_range_header(sub_range_header,
282 self._file_name_map)
Congbin Guoc4277582018-06-06 16:44:48 -0700283 content = self._read_bytes(size)
Congbin Guo3c6cc4b2018-06-14 17:45:10 -0700284
Congbin Guoc4277582018-06-06 16:44:48 -0700285 self._read_empty_line() # Every content has a trailing '\r\n'.
286
287 bytes_read = 0 if content is None else len(content)
288 if bytes_read != size:
289 raise FormatError(
290 '%s: Error in reading content (read %d B, expect %d B)' %
291 (filename, bytes_read, size)
292 )
293
294 yield filename, content