Blame - gs_cache/range_response.py - chromium.googlesource.com/chromiumos/platform/dev-util

blob: c8c4daca6aad70ebd06cbb085671e8bf15922e40 [file] [log] [blame]

Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	1	# -- coding: utf-8 --
				2	# Copyright 2018 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5	"""This module provides utils to handle response of "Range Request"."""
				6
				7	from __future__ import absolute_import
				8	from __future__ import division
				9	from __future__ import print_function
				10
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	11	import collections
				12	import itertools
				13	import json
				14	import re
				15
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	16	import constants
				17
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	18	_RANGE_HEADER_SEPARATORS = re.compile('[-/ ]')
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	19	_ONE_LINE = object() # Special object to indicate data reader to read one line.
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	20
				21	_ContentRangeHeader = collections.namedtuple('_ContentRangeHeader',
				22	('bytes', 'start', 'end', 'total'))
				23
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	24
				25	class FormatError(Exception):
				26	"""Exception raised when we parse wrong format of response."""
				27
				28
				29	class NoFileFoundError(Exception):
				30	"""Exception raised when we cannot get a file match the range."""
				31
				32
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	33	class ResponseQueueError(Exception):
				34	"""Exception raised when trying to queue responses not allowed."""
				35
				36
				37	def _get_file_by_range_header(range_header_str, file_name_map):
				38	"""Get file name and size by the Content-Range header.
				39
				40	The format of Content-Range header is like:
				41	Content-Range: bytes <start>-<end>/<total>
				42	We get the <start> and <end> from it and retrieve the file name from
				43	\|file_name_map\|.
				44
				45	Args:
				46	range_header_str: A string of range header.
				47	file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.
				48
				49	Returns:
				50	A tuple of (filename, size).
				51
				52	Raises:
				53	FormatError: Raised when response content interrupted.
				54	NoFileFoundError: Raised when we cannot get a file matches the range.
				55	"""
				56	# Split the part of 'Content-Range:' first if needed.
				57	if range_header_str.lower().startswith('content-range:'):
				58	range_header_str = range_header_str.split(': ', 1)[1]
				59
				60	try:
				61	range_header = _ContentRangeHeader._make(
				62	_RANGE_HEADER_SEPARATORS.split(range_header_str)
				63	)
				64	size = int(range_header.end) - int(range_header.start) + 1
				65	except (IndexError, ValueError):
				66	raise FormatError('Wrong format of content range header: %s' %
				67	range_header_str)
				68
				69	try:
				70	filename = file_name_map[(range_header.start, size)]
				71	except KeyError:
				72	raise NoFileFoundError('Cannot find a file matches the range %s' %
				73	range_header_str)
				74
				75	return filename, size
				76
				77
				78	class JsonStreamer(object):
				79	"""A class to stream the responses for range requests.
				80
				81	The class accepts responses and format the file content in all of them as a
				82	JSON stream. The format:
				83	'{"<filename>": "<content>", "<filename>": "<content>", ...}'
				84	"""
				85
				86	def __init__(self):
				87	self._files_iter_list = []
				88	self._can_add_more_response = True
				89
				90	def queue_response(self, response, file_info_list):
				91	"""Add a reponse to the queue to be streamed as JSON.
				92
				93	We can add either:
				94	1. one and only one response for single-part range requests, or
				95	2. a series of responses for multi-part range requests.
				96
				97	Args:
				98	response: An instance of requests.Response, which may be the response of a
				99	single range request, or a multi-part range request.
				100	file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
				101	up file name by content start offset and size.
				102
				103	Raises:
				104	FormatError: Raised when response to be queued isn't for a range request.
				105	ResponseQueueError: Raised when either queuing more than one response for
				106	single-part range request, or mixed responses for single-part and
				107	multi-part range request.
				108	"""
				109	if not self._can_add_more_response:
				110	raise ResponseQueueError(
				111	'No more reponses can be added when there was a response for '
				112	'single-part range request in the queue!')
				113
				114	file_name_map = {(f.content_start, int(f.size)): f.filename
				115	for f in file_info_list}
				116
				117	# Check if the response is for single range, or multi-part range. For a
				118	# single range request, the response must have header 'Content-Range'. For a
				119	# multi-part ranges request, the Content-Type header must be like
				120	# 'multipart/byteranges; ......'.
				121	content_range = response.headers.get('Content-Range', None)
				122	content_type = response.headers.get('Content-Type', '')
				123
				124	if content_range:
				125	if self._files_iter_list:
				126	raise ResponseQueueError(
				127	'Cannot queue more than one responses for single-part range '
				128	'request, or mix responses for single-part and multi-part.')
				129	filename, _ = _get_file_by_range_header(content_range, file_name_map)
				130	self._files_iter_list = [iter([(filename, response.content)])]
				131	self._can_add_more_response = False
				132
				133	elif content_type.startswith('multipart/byteranges;'):
				134	self._files_iter_list.append(
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	135	_file_iterator(response, file_name_map))
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	136
				137	else:
				138	raise FormatError('The response is not for a range request.')
				139
				140	def stream(self):
				141	"""Yield the series of responses content as a JSON stream.
				142
				143	Yields:
				144	A JSON stream in format described above.
				145	"""
				146	files_iter = itertools.chain(*self._files_iter_list)
				147
				148	json_encoder = json.JSONEncoder()
				149	filename, content = next(files_iter)
				150	yield '{%s: %s' % (json_encoder.encode(filename),
				151	json_encoder.encode(content))
				152	for filename, content in files_iter:
				153	yield ', %s: %s' % (json_encoder.encode(filename),
				154	json_encoder.encode(content))
				155	yield '}'
				156
				157
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	158	def _data_reader(data_iter):
				159	"""A coroutine to read data from \|data_iter\|.
				160
				161	It accepts two type of parameter:
				162	1. _ONE_LINE: Read one CRLF ended line if possible.
				163	2. An integer N: Read at most N bytes.
				164
				165	Args:
				166	data_iter: An iterator of data source.
				167
				168	Yields:
				169	The data read.
				170	"""
				171	buffered = next(data_iter)
				172
				173	# Get what to be read in runtime by passing value into the generator. See
				174	# https://docs.python.org/2.5/whatsnew/pep-342.html for syntax details.
				175	to_be_read = yield
				176
				177	while True:
				178	if to_be_read is _ONE_LINE:
				179	parts = buffered.split('\r\n', 1)
				180	if len(parts) == 2:
				181	line, buffered = parts
				182	to_be_read = (yield line)
				183	continue
				184
				185	else: # Read at most \|to_be_read\| bytes of data.
				186	bytes_remaining = to_be_read - len(buffered)
				187	if bytes_remaining < 0:
				188	read_bytes = buffered[:bytes_remaining]
				189	buffered = buffered[bytes_remaining:]
				190	to_be_read = (yield read_bytes)
				191	continue
				192
				193	try:
				194	buffered += next(data_iter)
				195	except StopIteration:
				196	break
				197
				198	if buffered:
				199	yield buffered
				200
				201
				202	def _read_line(reader):
				203	"""Read one CRLF ended line from the response.
				204
				205	Returns:
				206	The line read. Return None if nothing to read.
				207	"""
				208	return reader.send(_ONE_LINE)
				209
				210
				211	def _read_empty_line(reader):
				212	"""Read one line and assert it is empty."""
				213	try:
				214	line = _read_line(reader)
				215	except StopIteration:
				216	raise FormatError('Expect an empty line, but got EOF.')
				217	if line:
				218	raise FormatError('Expect an empty line, but got "%s".' % line)
				219
				220
				221	def _read_bytes(reader, max_bytes):
				222	"""Read at most \|max_bytes\| bytes from the reader.
				223
				224	Args:
				225	reader:
				226	max_bytes: An integer of maximum bytes of bytes to read.
				227
				228	Returns:
				229	The bytes read. Return None if nothing to read.
				230	"""
				231	return reader.send(max_bytes)
				232
				233
				234	def _file_iterator(response, file_name_map):
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	235	"""The iterator of files in a response of multi-part range request.
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	236
				237	An example response is like:
				238
				239	HTTP/1.1 206 Partial Content
				240	Content-Type: multipart/byteranges; boundary=magic_string
				241	Content-Length: 282
				242
				243	--magic_string
				244	Content-Type: text/html
				245	Content-Range: bytes 0-50/1270
				246
				247	<data>
				248	--magic_string
				249	Content-Type: text/html
				250	Content-Range: bytes 100-150/1270
				251
				252	<data>
				253	--magic_string--
				254
				255	In our application, each part is the content of a file. This class iterates
				256	the files.
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	257
				258	Args:
				259	response: An instance of requests.response.
				260	file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.
				261
				262	Yields:
				263	A pair of (name, content) of the file.
				264
				265	Raises:
				266	FormatError: Raised when response content interrupted.
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	267	"""
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	268	reader = _data_reader(
				269	response.iter_content(constants.READ_BUFFER_SIZE_BYTES))
				270	reader.next() # initialize the coroutine
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	271
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	272	_read_empty_line(reader) # The first line is empty.
				273	while True:
				274	_read_line(reader) # The second line is the boundary.
				275	_read_line(reader) # The line sub content type.
				276	sub_range_header = _read_line(reader) # The line of sub content range.
				277	if sub_range_header is None:
				278	break
				279	_read_empty_line(reader) # Another empty line.
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	280
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	281	filename, size = _get_file_by_range_header(sub_range_header,
				282	file_name_map)
				283	content = _read_bytes(reader, size)
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	284
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	285	_read_empty_line(reader) # Every content has a trailing '\r\n'.
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	286
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	287	bytes_read = 0 if content is None else len(content)
				288	if bytes_read != size:
				289	raise FormatError(
				290	'%s: Error in reading content (read %d B, expect %d B)' %
				291	(filename, bytes_read, size)
				292	)
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	293
Congbin Guo	52f7cd0	2018-06-20 13:12:36 -0700	[diff] [blame]	294	yield filename, content