Blame - gs_cache/range_response.py - chromium.googlesource.com/chromiumos/platform/dev-util

blob: 8b3e45e76942d9818bfc347bb8d487510c8d3bb3 [file] [log] [blame]

Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	1	# -- coding: utf-8 --
				2	# Copyright 2018 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5	"""This module provides utils to handle response of "Range Request"."""
				6
				7	from __future__ import absolute_import
				8	from __future__ import division
				9	from __future__ import print_function
				10
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	11	import collections
				12	import itertools
				13	import json
				14	import re
				15
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	16	import constants
				17
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	18	_RANGE_HEADER_SEPARATORS = re.compile('[-/ ]')
				19
				20	_ContentRangeHeader = collections.namedtuple('_ContentRangeHeader',
				21	('bytes', 'start', 'end', 'total'))
				22
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	23
				24	class FormatError(Exception):
				25	"""Exception raised when we parse wrong format of response."""
				26
				27
				28	class NoFileFoundError(Exception):
				29	"""Exception raised when we cannot get a file match the range."""
				30
				31
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	32	class ResponseQueueError(Exception):
				33	"""Exception raised when trying to queue responses not allowed."""
				34
				35
				36	def _get_file_by_range_header(range_header_str, file_name_map):
				37	"""Get file name and size by the Content-Range header.
				38
				39	The format of Content-Range header is like:
				40	Content-Range: bytes <start>-<end>/<total>
				41	We get the <start> and <end> from it and retrieve the file name from
				42	\|file_name_map\|.
				43
				44	Args:
				45	range_header_str: A string of range header.
				46	file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.
				47
				48	Returns:
				49	A tuple of (filename, size).
				50
				51	Raises:
				52	FormatError: Raised when response content interrupted.
				53	NoFileFoundError: Raised when we cannot get a file matches the range.
				54	"""
				55	# Split the part of 'Content-Range:' first if needed.
				56	if range_header_str.lower().startswith('content-range:'):
				57	range_header_str = range_header_str.split(': ', 1)[1]
				58
				59	try:
				60	range_header = _ContentRangeHeader._make(
				61	_RANGE_HEADER_SEPARATORS.split(range_header_str)
				62	)
				63	size = int(range_header.end) - int(range_header.start) + 1
				64	except (IndexError, ValueError):
				65	raise FormatError('Wrong format of content range header: %s' %
				66	range_header_str)
				67
				68	try:
				69	filename = file_name_map[(range_header.start, size)]
				70	except KeyError:
				71	raise NoFileFoundError('Cannot find a file matches the range %s' %
				72	range_header_str)
				73
				74	return filename, size
				75
				76
				77	class JsonStreamer(object):
				78	"""A class to stream the responses for range requests.
				79
				80	The class accepts responses and format the file content in all of them as a
				81	JSON stream. The format:
				82	'{"<filename>": "<content>", "<filename>": "<content>", ...}'
				83	"""
				84
				85	def __init__(self):
				86	self._files_iter_list = []
				87	self._can_add_more_response = True
				88
				89	def queue_response(self, response, file_info_list):
				90	"""Add a reponse to the queue to be streamed as JSON.
				91
				92	We can add either:
				93	1. one and only one response for single-part range requests, or
				94	2. a series of responses for multi-part range requests.
				95
				96	Args:
				97	response: An instance of requests.Response, which may be the response of a
				98	single range request, or a multi-part range request.
				99	file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
				100	up file name by content start offset and size.
				101
				102	Raises:
				103	FormatError: Raised when response to be queued isn't for a range request.
				104	ResponseQueueError: Raised when either queuing more than one response for
				105	single-part range request, or mixed responses for single-part and
				106	multi-part range request.
				107	"""
				108	if not self._can_add_more_response:
				109	raise ResponseQueueError(
				110	'No more reponses can be added when there was a response for '
				111	'single-part range request in the queue!')
				112
				113	file_name_map = {(f.content_start, int(f.size)): f.filename
				114	for f in file_info_list}
				115
				116	# Check if the response is for single range, or multi-part range. For a
				117	# single range request, the response must have header 'Content-Range'. For a
				118	# multi-part ranges request, the Content-Type header must be like
				119	# 'multipart/byteranges; ......'.
				120	content_range = response.headers.get('Content-Range', None)
				121	content_type = response.headers.get('Content-Type', '')
				122
				123	if content_range:
				124	if self._files_iter_list:
				125	raise ResponseQueueError(
				126	'Cannot queue more than one responses for single-part range '
				127	'request, or mix responses for single-part and multi-part.')
				128	filename, _ = _get_file_by_range_header(content_range, file_name_map)
				129	self._files_iter_list = [iter([(filename, response.content)])]
				130	self._can_add_more_response = False
				131
				132	elif content_type.startswith('multipart/byteranges;'):
				133	self._files_iter_list.append(
				134	iter(_FileIterator(response, file_name_map)))
				135
				136	else:
				137	raise FormatError('The response is not for a range request.')
				138
				139	def stream(self):
				140	"""Yield the series of responses content as a JSON stream.
				141
				142	Yields:
				143	A JSON stream in format described above.
				144	"""
				145	files_iter = itertools.chain(*self._files_iter_list)
				146
				147	json_encoder = json.JSONEncoder()
				148	filename, content = next(files_iter)
				149	yield '{%s: %s' % (json_encoder.encode(filename),
				150	json_encoder.encode(content))
				151	for filename, content in files_iter:
				152	yield ', %s: %s' % (json_encoder.encode(filename),
				153	json_encoder.encode(content))
				154	yield '}'
				155
				156
				157	class _FileIterator(object):
				158	"""The iterator of files in a response of multi-part range request.
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	159
				160	An example response is like:
				161
				162	HTTP/1.1 206 Partial Content
				163	Content-Type: multipart/byteranges; boundary=magic_string
				164	Content-Length: 282
				165
				166	--magic_string
				167	Content-Type: text/html
				168	Content-Range: bytes 0-50/1270
				169
				170	<data>
				171	--magic_string
				172	Content-Type: text/html
				173	Content-Range: bytes 100-150/1270
				174
				175	<data>
				176	--magic_string--
				177
				178	In our application, each part is the content of a file. This class iterates
				179	the files.
				180	"""
				181
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	182	def __init__(self, response, file_name_map):
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	183	"""Constructor.
				184
				185	Args:
				186	response: An instance of requests.response.
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	187	file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	188	"""
				189	self._response_iter = response.iter_content(
				190	constants.READ_BUFFER_SIZE_BYTES)
				191	self._chunk = None
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	192	self._file_name_map = file_name_map
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	193
				194	def __iter__(self):
				195	self._chunk = next(self._response_iter)
				196	return self._iter_files()
				197
				198	def _read_next_chunk(self):
				199	"""Helper function to read next chunk of data and return current chunk."""
				200	buffered = self._chunk
				201	try:
				202	self._chunk = next(self._response_iter)
				203	except StopIteration:
				204	self._chunk = None
				205
				206	return buffered
				207
				208	def _read_line(self):
				209	"""Read one CRLF ended line from the response.
				210
				211	Returns:
				212	The line read. Return None if nothing to read.
				213	"""
				214	if self._chunk is None:
				215	return None
				216
				217	buffered = ''
				218	while True:
				219	buffered += self._chunk
				220	parts = buffered.split('\r\n', 1)
				221	if len(parts) == 2:
				222	line, self._chunk = parts
				223	return line
				224	else: # No '\r\n' in current chunk. Read one more.
				225	self._read_next_chunk()
				226	if self._chunk is None:
				227	return buffered
				228
				229	def _read_bytes(self, max_bytes):
				230	"""Read at most \|max_bytes\| bytes from the response.
				231
				232	Args:
				233	max_bytes: An integer of maximum bytes of bytes to read.
				234
				235	Returns:
				236	The bytes read. Return None if nothing to read.
				237	"""
				238	if self._chunk is None:
				239	return None
				240
				241	buffered = ''
				242	bytes_remaining = max_bytes
				243	while True:
				244	bytes_remaining -= len(self._chunk)
				245	if bytes_remaining < 0:
				246	buffered += self._chunk[:bytes_remaining]
				247	self._chunk = self._chunk[bytes_remaining:]
				248	return buffered
				249
				250	buffered += self._read_next_chunk()
				251	if self._chunk is None:
				252	return buffered
				253
				254	def _read_empty_line(self):
				255	"""Read one line and assert it is empty."""
				256	line = self._read_line()
				257	if line is None:
				258	raise FormatError('Expect an empty line, but got EOF.')
				259
				260	if line:
				261	raise FormatError('Expect an empty line, but got "%s".' % line)
				262
				263	def _iter_files(self):
				264	"""Iterate the files in the response.
				265
				266	Yields:
				267	A pair of (name, content) of the file.
				268
				269	Raises:
				270	FormatError: Raised when response content interrupted.
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	271	"""
				272	self._read_empty_line() # The first line is empty.
				273	while True:
				274	self._read_line() # The second line is the boundary.
				275	self._read_line() # The line sub content type.
				276	sub_range_header = self._read_line() # The line of sub content range.
				277	if sub_range_header is None:
				278	break
				279	self._read_empty_line() # Another empty line.
				280
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	281	filename, size = _get_file_by_range_header(sub_range_header,
				282	self._file_name_map)
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	283	content = self._read_bytes(size)
Congbin Guo	3c6cc4b	2018-06-14 17:45:10 -0700	[diff] [blame]	284
Congbin Guo	c427758	2018-06-06 16:44:48 -0700	[diff] [blame]	285	self._read_empty_line() # Every content has a trailing '\r\n'.
				286
				287	bytes_read = 0 if content is None else len(content)
				288	if bytes_read != size:
				289	raise FormatError(
				290	'%s: Error in reading content (read %d B, expect %d B)' %
				291	(filename, bytes_read, size)
				292	)
				293
				294	yield filename, content