Blame - isolateserver.py - chromium.googlesource.com/infra/luci/client-py

blob: 904d242884a695edb7091f9b428d3e2e8c38fe09 [file] [log] [blame]

maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1	#!/usr/bin/env python
Marc-Antoine Ruel	8add124	2013-11-05 17:28:27 -0500	[diff] [blame]	2	# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruel	e98b112	2013-11-05 20:27:57 -0500	[diff] [blame]	3	# Use of this source code is governed under the Apache License, Version 2.0 that
				4	# can be found in the LICENSE file.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	5
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	6	"""Archives a set of files or directories to an Isolate Server."""
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	7
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	8	__version__ = '0.4.3'
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	9
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	10	import base64
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	11	import functools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	12	import logging
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	13	import optparse
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	14	import os
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	15	import re
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	16	import signal
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	17	import sys
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	18	import tempfile
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	19	import threading
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	20	import time
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	21	import types
maruel@chromium.org	e82112e	2013-04-24 14:41:55 +0000	[diff] [blame]	22	import urllib
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	23	import urlparse
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	24	import zlib
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	25
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	26	from third_party import colorama
				27	from third_party.depot_tools import fix_encoding
				28	from third_party.depot_tools import subcommand
				29
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	30	from utils import file_path
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame^]	31	from utils import logging_utils
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	32	from utils import lru
vadimsh@chromium.org	6b70621	2013-08-28 15:03:46 +0000	[diff] [blame]	33	from utils import net
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	34	from utils import on_error
vadimsh@chromium.org	b074b16	2013-08-22 17:55:46 +0000	[diff] [blame]	35	from utils import threading_utils
vadimsh@chromium.org	a432647	2013-08-24 02:05:41 +0000	[diff] [blame]	36	from utils import tools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	37
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	38	import auth
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	39	import isolated_format
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	40
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	41
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	42	# Version of isolate protocol passed to the server in /handshake request.
				43	ISOLATE_PROTOCOL_VERSION = '1.0'
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	44
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	45
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	46	# The file size to be used when we don't know the correct file size,
				47	# generally used for .isolated files.
				48	UNKNOWN_FILE_SIZE = None
				49
				50
				51	# Maximum expected delay (in seconds) between successive file fetches or uploads
				52	# in Storage. If it takes longer than that, a deadlock might be happening
				53	# and all stack frames for all threads are dumped to log.
				54	DEADLOCK_TIMEOUT = 5 * 60
				55
				56
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	57	# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	58	# All files are sorted by likelihood of a change in the file content
				59	# (currently file size is used to estimate this: larger the file -> larger the
				60	# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	61	# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	62	# and so on. Numbers here is a trade-off; the more per request, the lower the
				63	# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
				64	# larger values cause longer lookups, increasing the initial latency to start
				65	# uploading, which is especially an issue for large files. This value is
				66	# optimized for the "few thousands files to look up with minimal number of large
				67	# files missing" case.
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	68	ITEMS_PER_CONTAINS_QUERIES = (20, 20, 50, 50, 50, 100)
csharp@chromium.org	07fa759	2013-01-11 18:19:30 +0000	[diff] [blame]	69
maruel@chromium.org	9958e4a	2013-09-17 00:01:48 +0000	[diff] [blame]	70
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	71	# A list of already compressed extension types that should not receive any
				72	# compression before being uploaded.
				73	ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel	7f234c8	2014-08-06 21:55:18 -0400	[diff] [blame]	74	'7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
				75	'png', 'wav', 'zip',
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	76	]
				77
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	78
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	79	# Chunk size to use when reading from network stream.
				80	NET_IO_FILE_CHUNK = 16 * 1024
				81
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	82
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	83	# Read timeout in seconds for downloads from isolate storage. If there's no
				84	# response from the server within this timeout whole download will be aborted.
				85	DOWNLOAD_READ_TIMEOUT = 60
				86
				87
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	88	# The delay (in seconds) to wait between logging statements when retrieving
				89	# the required files. This is intended to let the user (or buildbot) know that
				90	# the program is still running.
				91	DELAY_BETWEEN_UPDATES_IN_SECS = 30
				92
				93
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	94	DEFAULT_BLACKLIST = (
				95	# Temporary vim or python files.
				96	r'^.+\.(?:pyc\|swp)$',
				97	# .git or .svn directory.
				98	r'^(?:.+' + re.escape(os.path.sep) + r'\|)\.(?:git\|svn)$',
				99	)
				100
				101
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	102	# A class to use to communicate with the server by default. Can be changed by
				103	# 'set_storage_api_class'. Default is IsolateServer.
				104	_storage_api_cls = None
				105
				106
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	107	class Error(Exception):
				108	"""Generic runtime error."""
				109	pass
				110
				111
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	112	class Aborted(Error):
				113	"""Operation aborted."""
				114	pass
				115
				116
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	117	def stream_read(stream, chunk_size):
				118	"""Reads chunks from \|stream\| and yields them."""
				119	while True:
				120	data = stream.read(chunk_size)
				121	if not data:
				122	break
				123	yield data
				124
				125
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	126	def file_read(filepath, chunk_size=isolated_format.DISK_FILE_CHUNK, offset=0):
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	127	"""Yields file content in chunks of \|chunk_size\| starting from \|offset\|."""
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	128	with open(filepath, 'rb') as f:
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	129	if offset:
				130	f.seek(offset)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	131	while True:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	132	data = f.read(chunk_size)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	133	if not data:
				134	break
				135	yield data
				136
				137
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	138	def file_write(filepath, content_generator):
				139	"""Writes file content as generated by content_generator.
				140
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	141	Creates the intermediary directory as needed.
				142
				143	Returns the number of bytes written.
				144
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	145	Meant to be mocked out in unit tests.
				146	"""
				147	filedir = os.path.dirname(filepath)
				148	if not os.path.isdir(filedir):
				149	os.makedirs(filedir)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	150	total = 0
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	151	with open(filepath, 'wb') as f:
				152	for d in content_generator:
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	153	total += len(d)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	154	f.write(d)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	155	return total
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	156
				157
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	158	def zip_compress(content_generator, level=7):
				159	"""Reads chunks from \|content_generator\| and yields zip compressed chunks."""
				160	compressor = zlib.compressobj(level)
				161	for chunk in content_generator:
				162	compressed = compressor.compress(chunk)
				163	if compressed:
				164	yield compressed
				165	tail = compressor.flush(zlib.Z_FINISH)
				166	if tail:
				167	yield tail
				168
				169
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	170	def zip_decompress(
				171	content_generator, chunk_size=isolated_format.DISK_FILE_CHUNK):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	172	"""Reads zipped data from \|content_generator\| and yields decompressed data.
				173
				174	Decompresses data in small chunks (no larger than \|chunk_size\|) so that
				175	zip bomb file doesn't cause zlib to preallocate huge amount of memory.
				176
				177	Raises IOError if data is corrupted or incomplete.
				178	"""
				179	decompressor = zlib.decompressobj()
				180	compressed_size = 0
				181	try:
				182	for chunk in content_generator:
				183	compressed_size += len(chunk)
				184	data = decompressor.decompress(chunk, chunk_size)
				185	if data:
				186	yield data
				187	while decompressor.unconsumed_tail:
				188	data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
				189	if data:
				190	yield data
				191	tail = decompressor.flush()
				192	if tail:
				193	yield tail
				194	except zlib.error as e:
				195	raise IOError(
				196	'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
				197	# Ensure all data was read and decompressed.
				198	if decompressor.unused_data or decompressor.unconsumed_tail:
				199	raise IOError('Not all data was decompressed')
				200
				201
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	202	def get_zip_compression_level(filename):
				203	"""Given a filename calculates the ideal zip compression level to use."""
				204	file_ext = os.path.splitext(filename)[1].lower()
				205	# TODO(csharp): Profile to find what compression level works best.
				206	return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
				207
				208
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	209	def create_directories(base_directory, files):
				210	"""Creates the directory structure needed by the given list of files."""
				211	logging.debug('create_directories(%s, %d)', base_directory, len(files))
				212	# Creates the tree of directories to create.
				213	directories = set(os.path.dirname(f) for f in files)
				214	for item in list(directories):
				215	while item:
				216	directories.add(item)
				217	item = os.path.dirname(item)
				218	for d in sorted(directories):
				219	if d:
				220	os.mkdir(os.path.join(base_directory, d))
				221
				222
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	223	def create_symlinks(base_directory, files):
				224	"""Creates any symlinks needed by the given set of files."""
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	225	for filepath, properties in files:
				226	if 'l' not in properties:
				227	continue
				228	if sys.platform == 'win32':
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	229	# TODO(maruel): Create symlink via the win32 api.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	230	logging.warning('Ignoring symlink %s', filepath)
				231	continue
				232	outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	233	# os.symlink() doesn't exist on Windows.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	234	os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	235
				236
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	237	def is_valid_file(filepath, size):
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	238	"""Determines if the given files appears valid.
				239
				240	Currently it just checks the file's size.
				241	"""
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	242	if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	243	return os.path.isfile(filepath)
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	244	actual_size = os.stat(filepath).st_size
				245	if size != actual_size:
				246	logging.warning(
				247	'Found invalid item %s; %d != %d',
				248	os.path.basename(filepath), actual_size, size)
				249	return False
				250	return True
				251
				252
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	253	class Item(object):
				254	"""An item to push to Storage.
				255
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	256	Its digest and size may be provided in advance, if known. Otherwise they will
				257	be derived from content(). If digest is provided, it MUST correspond to
				258	hash algorithm used by Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	259
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	260	When used with Storage, Item starts its life in a main thread, travels
				261	to 'contains' thread, then to 'push' thread and then finally back to
				262	the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	263	"""
				264
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	265	def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	266	self.digest = digest
				267	self.size = size
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	268	self.high_priority = high_priority
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	269	self.compression_level = 6
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	270
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	271	def content(self):
				272	"""Iterable with content of this item as byte string (str) chunks."""
				273	raise NotImplementedError()
				274
				275	def prepare(self, hash_algo):
				276	"""Ensures self.digest and self.size are set.
				277
				278	Uses content() as a source of data to calculate them. Does nothing if digest
				279	and size is already known.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	280
				281	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	282	hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	283	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	284	if self.digest is None or self.size is None:
				285	digest = hash_algo()
				286	total = 0
				287	for chunk in self.content():
				288	digest.update(chunk)
				289	total += len(chunk)
				290	self.digest = digest.hexdigest()
				291	self.size = total
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	292
				293
				294	class FileItem(Item):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	295	"""A file to push to Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	296
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	297	Its digest and size may be provided in advance, if known. Otherwise they will
				298	be derived from the file content.
				299	"""
				300
				301	def __init__(self, path, digest=None, size=None, high_priority=False):
				302	super(FileItem, self).__init__(
				303	digest,
				304	size if size is not None else os.stat(path).st_size,
				305	high_priority)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	306	self.path = path
				307	self.compression_level = get_zip_compression_level(path)
				308
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	309	def content(self):
				310	return file_read(self.path)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	311
				312
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	313	class BufferItem(Item):
				314	"""A byte buffer to push to Storage."""
				315
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	316	def __init__(self, buf, high_priority=False):
				317	super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	318	self.buffer = buf
				319
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	320	def content(self):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	321	return [self.buffer]
				322
				323
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	324	class Storage(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	325	"""Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	326
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	327	Implements compression support, parallel 'contains' checks, parallel uploads
				328	and more.
				329
				330	Works only within single namespace (and thus hashing algorithm and compression
				331	scheme are fixed).
				332
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	333	Spawns multiple internal threads. Thread safe, but not fork safe. Modifies
				334	signal handlers table to handle Ctrl+C.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	335	"""
				336
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	337	def __init__(self, storage_api):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	338	self._storage_api = storage_api
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	339	self._use_zip = isolated_format.is_namespace_with_compression(
				340	storage_api.namespace)
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	341	self._hash_algo = isolated_format.get_hash_algo(storage_api.namespace)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	342	self._cpu_thread_pool = None
				343	self._net_thread_pool = None
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	344	self._aborted = False
				345	self._prev_sig_handlers = {}
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	346
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	347	@property
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	348	def hash_algo(self):
				349	"""Hashing algorithm used to name files in storage based on their content.
				350
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	351	Defined by \|namespace\|. See also isolated_format.get_hash_algo().
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	352	"""
				353	return self._hash_algo
				354
				355	@property
				356	def location(self):
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	357	"""URL of the backing store that this class is using."""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	358	return self._storage_api.location
				359
				360	@property
				361	def namespace(self):
				362	"""Isolate namespace used by this storage.
				363
				364	Indirectly defines hashing scheme and compression method used.
				365	"""
				366	return self._storage_api.namespace
				367
				368	@property
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	369	def cpu_thread_pool(self):
				370	"""ThreadPool for CPU-bound tasks like zipping."""
				371	if self._cpu_thread_pool is None:
Marc-Antoine Ruel	bdad118	2015-02-06 16:04:35 -0500	[diff] [blame]	372	threads = max(threading_utils.num_processors(), 2)
				373	if sys.maxsize <= 2L**32:
				374	# On 32 bits userland, do not try to use more than 16 threads.
				375	threads = min(threads, 16)
				376	self._cpu_thread_pool = threading_utils.ThreadPool(2, threads, 0, 'zip')
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	377	return self._cpu_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	378
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	379	@property
				380	def net_thread_pool(self):
				381	"""AutoRetryThreadPool for IO-bound tasks, retries IOError."""
				382	if self._net_thread_pool is None:
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	383	self._net_thread_pool = threading_utils.IOAutoRetryThreadPool()
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	384	return self._net_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	385
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	386	def close(self):
				387	"""Waits for all pending tasks to finish."""
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	388	logging.info('Waiting for all threads to die...')
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	389	if self._cpu_thread_pool:
				390	self._cpu_thread_pool.join()
				391	self._cpu_thread_pool.close()
				392	self._cpu_thread_pool = None
				393	if self._net_thread_pool:
				394	self._net_thread_pool.join()
				395	self._net_thread_pool.close()
				396	self._net_thread_pool = None
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	397	logging.info('Done.')
				398
				399	def abort(self):
				400	"""Cancels any pending or future operations."""
				401	# This is not strictly theadsafe, but in the worst case the logging message
				402	# will be printed twice. Not a big deal. In other places it is assumed that
				403	# unprotected reads and writes to _aborted are serializable (it is true
				404	# for python) and thus no locking is used.
				405	if not self._aborted:
				406	logging.warning('Aborting... It can take a while.')
				407	self._aborted = True
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	408
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	409	def __enter__(self):
				410	"""Context manager interface."""
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	411	assert not self._prev_sig_handlers, self._prev_sig_handlers
				412	for s in (signal.SIGINT, signal.SIGTERM):
				413	self._prev_sig_handlers[s] = signal.signal(s, lambda *_args: self.abort())
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	414	return self
				415
				416	def __exit__(self, _exc_type, _exc_value, _traceback):
				417	"""Context manager interface."""
				418	self.close()
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	419	while self._prev_sig_handlers:
				420	s, h = self._prev_sig_handlers.popitem()
				421	signal.signal(s, h)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	422	return False
				423
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	424	def upload_items(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	425	"""Uploads a bunch of items to the isolate server.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	426
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	427	It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	428
				429	Arguments:
				430	items: list of Item instances that represents data to upload.
				431
				432	Returns:
				433	List of items that were uploaded. All other items are already there.
				434	"""
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	435	logging.info('upload_items(items=%d)', len(items))
				436
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	437	# Ensure all digests are calculated.
				438	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	439	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	440
vadimsh@chromium.org	672cd2b	2013-10-08 17:49:33 +0000	[diff] [blame]	441	# For each digest keep only first Item that matches it. All other items
				442	# are just indistinguishable copies from the point of view of isolate
				443	# server (it doesn't care about paths at all, only content and digests).
				444	seen = {}
				445	duplicates = 0
				446	for item in items:
				447	if seen.setdefault(item.digest, item) is not item:
				448	duplicates += 1
				449	items = seen.values()
				450	if duplicates:
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	451	logging.info('Skipped %d files with duplicated content', duplicates)
vadimsh@chromium.org	672cd2b	2013-10-08 17:49:33 +0000	[diff] [blame]	452
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	453	# Enqueue all upload tasks.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	454	missing = set()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	455	uploaded = []
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	456	channel = threading_utils.TaskChannel()
				457	for missing_item, push_state in self.get_missing_items(items):
				458	missing.add(missing_item)
				459	self.async_push(channel, missing_item, push_state)
				460
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	461	# No need to spawn deadlock detector thread if there's nothing to upload.
				462	if missing:
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	463	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	464	# Wait for all started uploads to finish.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	465	while len(uploaded) != len(missing):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	466	detector.ping()
				467	item = channel.pull()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	468	uploaded.append(item)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	469	logging.debug(
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	470	'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	471	logging.info('All files are uploaded')
				472
				473	# Print stats.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	474	total = len(items)
				475	total_size = sum(f.size for f in items)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	476	logging.info(
				477	'Total: %6d, %9.1fkb',
				478	total,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	479	total_size / 1024.)
				480	cache_hit = set(items) - missing
				481	cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	482	logging.info(
				483	'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				484	len(cache_hit),
				485	cache_hit_size / 1024.,
				486	len(cache_hit) * 100. / total,
				487	cache_hit_size * 100. / total_size if total_size else 0)
				488	cache_miss = missing
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	489	cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	490	logging.info(
				491	'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				492	len(cache_miss),
				493	cache_miss_size / 1024.,
				494	len(cache_miss) * 100. / total,
				495	cache_miss_size * 100. / total_size if total_size else 0)
				496
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	497	return uploaded
				498
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	499	def get_fetch_url(self, item):
				500	"""Returns an URL that can be used to fetch given item once it's uploaded.
				501
				502	Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	503
				504	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	505	item: Item to get fetch URL for.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	506
				507	Returns:
				508	An URL or None if underlying protocol doesn't support this.
				509	"""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	510	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	511	return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	512
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	513	def async_push(self, channel, item, push_state):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	514	"""Starts asynchronous push to the server in a parallel thread.
				515
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	516	Can be used only after \|item\| was checked for presence on a server with
				517	'get_missing_items' call. 'get_missing_items' returns \|push_state\| object
				518	that contains storage specific information describing how to upload
				519	the item (for example in case of cloud storage, it is signed upload URLs).
				520
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	521	Arguments:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	522	channel: TaskChannel that receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	523	item: item to upload as instance of Item class.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	524	push_state: push state returned by 'get_missing_items' call for \|item\|.
				525
				526	Returns:
				527	None, but \|channel\| later receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	528	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	529	# Thread pool task priority.
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	530	priority = (
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	531	threading_utils.PRIORITY_HIGH if item.high_priority
				532	else threading_utils.PRIORITY_MED)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	533
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	534	def push(content):
Marc-Antoine Ruel	095a8be	2014-03-21 14:58:19 -0400	[diff] [blame]	535	"""Pushes an Item and returns it to \|channel\|."""
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	536	if self._aborted:
				537	raise Aborted()
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	538	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	539	self._storage_api.push(item, push_state, content)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	540	return item
				541
				542	# If zipping is not required, just start a push task.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	543	if not self._use_zip:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	544	self.net_thread_pool.add_task_with_channel(
				545	channel, priority, push, item.content())
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	546	return
				547
				548	# If zipping is enabled, zip in a separate thread.
				549	def zip_and_push():
				550	# TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
				551	# content right here. It will block until all file is zipped.
				552	try:
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	553	if self._aborted:
				554	raise Aborted()
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	555	stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	556	data = ''.join(stream)
				557	except Exception as exc:
				558	logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	559	channel.send_exception()
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	560	return
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	561	self.net_thread_pool.add_task_with_channel(
				562	channel, priority, push, [data])
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	563	self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	564
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	565	def push(self, item, push_state):
				566	"""Synchronously pushes a single item to the server.
				567
				568	If you need to push many items at once, consider using 'upload_items' or
				569	'async_push' with instance of TaskChannel.
				570
				571	Arguments:
				572	item: item to upload as instance of Item class.
				573	push_state: push state returned by 'get_missing_items' call for \|item\|.
				574
				575	Returns:
				576	Pushed item (same object as \|item\|).
				577	"""
				578	channel = threading_utils.TaskChannel()
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	579	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	580	self.async_push(channel, item, push_state)
				581	pushed = channel.pull()
				582	assert pushed is item
				583	return item
				584
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	585	def async_fetch(self, channel, priority, digest, size, sink):
				586	"""Starts asynchronous fetch from the server in a parallel thread.
				587
				588	Arguments:
				589	channel: TaskChannel that receives back \|digest\| when download ends.
				590	priority: thread pool task priority for the fetch.
				591	digest: hex digest of an item to download.
				592	size: expected size of the item (after decompression).
				593	sink: function that will be called as sink(generator).
				594	"""
				595	def fetch():
				596	try:
				597	# Prepare reading pipeline.
				598	stream = self._storage_api.fetch(digest)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	599	if self._use_zip:
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	600	stream = zip_decompress(stream, isolated_format.DISK_FILE_CHUNK)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	601	# Run \|stream\| through verifier that will assert its size.
				602	verifier = FetchStreamVerifier(stream, size)
				603	# Verified stream goes to \|sink\|.
				604	sink(verifier.run())
				605	except Exception as err:
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	606	logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	607	raise
				608	return digest
				609
				610	# Don't bother with zip_thread_pool for decompression. Decompression is
				611	# really fast and most probably IO bound anyway.
				612	self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
				613
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	614	def get_missing_items(self, items):
				615	"""Yields items that are missing from the server.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	616
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	617	Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	618
				619	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	620	items: a list of Item objects to check.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	621
				622	Yields:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	623	For each missing item it yields a pair (item, push_state), where:
				624	* item - Item object that is missing (one of \|items\|).
				625	* push_state - opaque object that contains storage specific information
				626	describing how to upload the item (for example in case of cloud
				627	storage, it is signed upload URLs). It can later be passed to
				628	'async_push'.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	629	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	630	channel = threading_utils.TaskChannel()
				631	pending = 0
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	632
				633	# Ensure all digests are calculated.
				634	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	635	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	636
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	637	def contains(batch):
				638	if self._aborted:
				639	raise Aborted()
				640	return self._storage_api.contains(batch)
				641
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	642	# Enqueue all requests.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	643	for batch in batch_items_for_check(items):
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	644	self.net_thread_pool.add_task_with_channel(
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	645	channel, threading_utils.PRIORITY_HIGH, contains, batch)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	646	pending += 1
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	647
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	648	# Yield results as they come in.
				649	for _ in xrange(pending):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	650	for missing_item, push_state in channel.pull().iteritems():
				651	yield missing_item, push_state
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	652
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	653
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	654	def batch_items_for_check(items):
				655	"""Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	656
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	657	Each batch corresponds to a single 'exists?' query to the server via a call
				658	to StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	659
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	660	Arguments:
				661	items: a list of Item objects.
				662
				663	Yields:
				664	Batches of items to query for existence in a single operation,
				665	each batch is a list of Item objects.
				666	"""
				667	batch_count = 0
				668	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
				669	next_queries = []
				670	for item in sorted(items, key=lambda x: x.size, reverse=True):
				671	next_queries.append(item)
				672	if len(next_queries) == batch_size_limit:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	673	yield next_queries
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	674	next_queries = []
				675	batch_count += 1
				676	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
				677	min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
				678	if next_queries:
				679	yield next_queries
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	680
				681
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	682	class FetchQueue(object):
				683	"""Fetches items from Storage and places them into LocalCache.
				684
				685	It manages multiple concurrent fetch operations. Acts as a bridge between
				686	Storage and LocalCache so that Storage and LocalCache don't depend on each
				687	other at all.
				688	"""
				689
				690	def __init__(self, storage, cache):
				691	self.storage = storage
				692	self.cache = cache
				693	self._channel = threading_utils.TaskChannel()
				694	self._pending = set()
				695	self._accessed = set()
				696	self._fetched = cache.cached_set()
				697
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	698	def add(
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	699	self,
				700	digest,
				701	size=UNKNOWN_FILE_SIZE,
				702	priority=threading_utils.PRIORITY_MED):
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	703	"""Starts asynchronous fetch of item \|digest\|."""
				704	# Fetching it now?
				705	if digest in self._pending:
				706	return
				707
				708	# Mark this file as in use, verify_all_cached will later ensure it is still
				709	# in cache.
				710	self._accessed.add(digest)
				711
				712	# Already fetched? Notify cache to update item's LRU position.
				713	if digest in self._fetched:
				714	# 'touch' returns True if item is in cache and not corrupted.
				715	if self.cache.touch(digest, size):
				716	return
				717	# Item is corrupted, remove it from cache and fetch it again.
				718	self._fetched.remove(digest)
				719	self.cache.evict(digest)
				720
				721	# TODO(maruel): It should look at the free disk space, the current cache
				722	# size and the size of the new item on every new item:
				723	# - Trim the cache as more entries are listed when free disk space is low,
				724	# otherwise if the amount of data downloaded during the run > free disk
				725	# space, it'll crash.
				726	# - Make sure there's enough free disk space to fit all dependencies of
				727	# this run! If not, abort early.
				728
				729	# Start fetching.
				730	self._pending.add(digest)
				731	self.storage.async_fetch(
				732	self._channel, priority, digest, size,
				733	functools.partial(self.cache.write, digest))
				734
				735	def wait(self, digests):
				736	"""Starts a loop that waits for at least one of \|digests\| to be retrieved.
				737
				738	Returns the first digest retrieved.
				739	"""
				740	# Flush any already fetched items.
				741	for digest in digests:
				742	if digest in self._fetched:
				743	return digest
				744
				745	# Ensure all requested items are being fetched now.
				746	assert all(digest in self._pending for digest in digests), (
				747	digests, self._pending)
				748
				749	# Wait for some requested item to finish fetching.
				750	while self._pending:
				751	digest = self._channel.pull()
				752	self._pending.remove(digest)
				753	self._fetched.add(digest)
				754	if digest in digests:
				755	return digest
				756
				757	# Should never reach this point due to assert above.
				758	raise RuntimeError('Impossible state')
				759
				760	def inject_local_file(self, path, algo):
				761	"""Adds local file to the cache as if it was fetched from storage."""
				762	with open(path, 'rb') as f:
				763	data = f.read()
				764	digest = algo(data).hexdigest()
				765	self.cache.write(digest, [data])
				766	self._fetched.add(digest)
				767	return digest
				768
				769	@property
				770	def pending_count(self):
				771	"""Returns number of items to be fetched."""
				772	return len(self._pending)
				773
				774	def verify_all_cached(self):
				775	"""True if all accessed items are in cache."""
				776	return self._accessed.issubset(self.cache.cached_set())
				777
				778
				779	class FetchStreamVerifier(object):
				780	"""Verifies that fetched file is valid before passing it to the LocalCache."""
				781
				782	def __init__(self, stream, expected_size):
Marc-Antoine Ruel	df4976d	2015-04-15 19:56:21 -0400	[diff] [blame]	783	assert stream is not None
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	784	self.stream = stream
				785	self.expected_size = expected_size
				786	self.current_size = 0
				787
				788	def run(self):
				789	"""Generator that yields same items as \|stream\|.
				790
				791	Verifies \|stream\| is complete before yielding a last chunk to consumer.
				792
				793	Also wraps IOError produced by consumer into MappingError exceptions since
				794	otherwise Storage will retry fetch on unrelated local cache errors.
				795	"""
				796	# Read one chunk ahead, keep it in \|stored\|.
				797	# That way a complete stream can be verified before pushing last chunk
				798	# to consumer.
				799	stored = None
				800	for chunk in self.stream:
				801	assert chunk is not None
				802	if stored is not None:
				803	self._inspect_chunk(stored, is_last=False)
				804	try:
				805	yield stored
				806	except IOError as exc:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	807	raise isolated_format.MappingError(
				808	'Failed to store an item in cache: %s' % exc)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	809	stored = chunk
				810	if stored is not None:
				811	self._inspect_chunk(stored, is_last=True)
				812	try:
				813	yield stored
				814	except IOError as exc:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	815	raise isolated_format.MappingError(
				816	'Failed to store an item in cache: %s' % exc)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	817
				818	def _inspect_chunk(self, chunk, is_last):
				819	"""Called for each fetched chunk before passing it to consumer."""
				820	self.current_size += len(chunk)
Marc-Antoine Ruel	1e7658c	2014-08-28 19:46:39 -0400	[diff] [blame]	821	if (is_last and
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	822	(self.expected_size != UNKNOWN_FILE_SIZE) and
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	823	(self.expected_size != self.current_size)):
				824	raise IOError('Incorrect file size: expected %d, got %d' % (
				825	self.expected_size, self.current_size))
				826
				827
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	828	class StorageApi(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	829	"""Interface for classes that implement low-level storage operations.
				830
				831	StorageApi is oblivious of compression and hashing scheme used. This details
				832	are handled in higher level Storage class.
				833
				834	Clients should generally not use StorageApi directly. Storage class is
				835	preferred since it implements compression and upload optimizations.
				836	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	837
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	838	@property
				839	def location(self):
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	840	"""URL of the backing store that this class is using."""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	841	raise NotImplementedError()
				842
				843	@property
				844	def namespace(self):
				845	"""Isolate namespace used by this storage.
				846
				847	Indirectly defines hashing scheme and compression method used.
				848	"""
				849	raise NotImplementedError()
				850
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	851	def get_fetch_url(self, digest):
				852	"""Returns an URL that can be used to fetch an item with given digest.
				853
				854	Arguments:
				855	digest: hex digest of item to fetch.
				856
				857	Returns:
				858	An URL or None if the protocol doesn't support this.
				859	"""
				860	raise NotImplementedError()
				861
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	862	def fetch(self, digest, offset=0):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	863	"""Fetches an object and yields its content.
				864
				865	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	866	digest: hash digest of item to download.
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	867	offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	868
				869	Yields:
				870	Chunks of downloaded item (as str objects).
				871	"""
				872	raise NotImplementedError()
				873
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	874	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	875	"""Uploads an \|item\| with content generated by \|content\| generator.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	876
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	877	\|item\| MUST go through 'contains' call to get \|push_state\| before it can
				878	be pushed to the storage.
				879
				880	To be clear, here is one possible usage:
				881	all_items = [... all items to push as Item subclasses ...]
				882	for missing_item, push_state in storage_api.contains(all_items).items():
				883	storage_api.push(missing_item, push_state)
				884
				885	When pushing to a namespace with compression, data that should be pushed
				886	and data provided by the item is not the same. In that case \|content\| is
				887	not None and it yields chunks of compressed data (using item.content() as
				888	a source of original uncompressed data). This is implemented by Storage
				889	class.
				890
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	891	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	892	item: Item object that holds information about an item being pushed.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	893	push_state: push state object as returned by 'contains' call.
				894	content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	895
				896	Returns:
				897	None.
				898	"""
				899	raise NotImplementedError()
				900
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	901	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	902	"""Checks for \|items\| on the server, prepares missing ones for upload.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	903
				904	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	905	items: list of Item objects to check for presence.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	906
				907	Returns:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	908	A dict missing Item -> opaque push state object to be passed to 'push'.
				909	See doc string for 'push'.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	910	"""
				911	raise NotImplementedError()
				912
				913
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	914	class _IsolateServerPushState(object):
				915	"""Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	916
				917	Note this needs to be a global class to support pickling.
				918	"""
				919
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	920	def __init__(self, preupload_status, size):
				921	self.preupload_status = preupload_status
				922	gs_upload_url = preupload_status.get('gs_upload_url') or None
				923	if gs_upload_url:
				924	self.upload_url = gs_upload_url
				925	self.finalize_url = '_ah/api/isolateservice/v1/finalize_gs_upload'
				926	else:
				927	self.upload_url = '_ah/api/isolateservice/v1/store_inline'
				928	self.finalize_url = None
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	929	self.uploaded = False
				930	self.finalized = False
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	931	self.size = size
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	932
				933
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	934	class IsolateServer(StorageApi):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	935	"""StorageApi implementation that downloads and uploads to Isolate Server.
				936
				937	It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	938	Works only within single namespace.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	939	"""
				940
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	941	def __init__(self, base_url, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	942	super(IsolateServer, self).__init__()
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	943	assert file_path.is_url(base_url), base_url
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	944	self._base_url = base_url.rstrip('/')
				945	self._namespace = namespace
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	946	self._namespace_dict = {
				947	'compression': 'flate' if namespace.endswith(
				948	('-gzip', '-flate')) else '',
				949	'digest_hash': 'sha-1',
				950	'namespace': namespace,
				951	}
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	952	self._lock = threading.Lock()
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	953	self._server_caps = None
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	954	self._memory_use = 0
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	955
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	956	@property
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	957	def _server_capabilities(self):
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	958	"""Gets server details.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	959
				960	Returns:
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	961	Server capabilities dictionary as returned by /server_details endpoint.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	962	"""
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	963	# TODO(maruel): Make this request much earlier asynchronously while the
				964	# files are being enumerated.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	965
				966	# TODO(vadimsh): Put \|namespace\| in the URL so that server can apply
				967	# namespace-level ACLs to this call.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	968
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	969	with self._lock:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	970	if self._server_caps is None:
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	971	self._server_caps = net.url_read_json(
				972	url='%s/_ah/api/isolateservice/v1/server_details' % self._base_url,
				973	data={})
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	974	return self._server_caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	975
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	976	@property
				977	def location(self):
				978	return self._base_url
				979
				980	@property
				981	def namespace(self):
				982	return self._namespace
				983
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	984	def get_fetch_url(self, digest):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	985	assert isinstance(digest, basestring)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	986	return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	987	self._base_url, self._namespace, digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	988
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	989	def fetch(self, digest, offset=0):
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	990	assert offset >= 0
				991	source_url = '%s/_ah/api/isolateservice/v1/retrieve' % (
				992	self._base_url)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	993	logging.debug('download_file(%s, %d)', source_url, offset)
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	994	response = self.do_fetch(source_url, digest, offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	995
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	996	if not response:
				997	raise IOError('Attempted to fetch from %s; no data exist.' % source_url)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	998
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	999	# for DB uploads
				1000	content = response.get('content')
				1001	if content is not None:
				1002	return base64.b64decode(content)
				1003
				1004	# for GS entities
				1005	connection = net.url_open(response['url'])
				1006
				1007	# If \|offset\|, verify server respects it by checking Content-Range.
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1008	if offset:
				1009	content_range = connection.get_header('Content-Range')
				1010	if not content_range:
				1011	raise IOError('Missing Content-Range header')
				1012
				1013	# 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
				1014	# According to a spec, <size> can be '*' meaning "Total size of the file
				1015	# is not known in advance".
				1016	try:
				1017	match = re.match(r'bytes (\d+)-(\d+)/(\d+\|\*)', content_range)
				1018	if not match:
				1019	raise ValueError()
				1020	content_offset = int(match.group(1))
				1021	last_byte_index = int(match.group(2))
				1022	size = None if match.group(3) == '*' else int(match.group(3))
				1023	except ValueError:
				1024	raise IOError('Invalid Content-Range header: %s' % content_range)
				1025
				1026	# Ensure returned offset equals requested one.
				1027	if offset != content_offset:
				1028	raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
				1029	offset, content_offset, content_range))
				1030
				1031	# Ensure entire tail of the file is returned.
				1032	if size is not None and last_byte_index + 1 != size:
				1033	raise IOError('Incomplete response. Content-Range: %s' % content_range)
				1034
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1035	return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1036
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1037	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1038	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1039	assert item.digest is not None
				1040	assert item.size is not None
				1041	assert isinstance(push_state, _IsolateServerPushState)
				1042	assert not push_state.finalized
				1043
				1044	# Default to item.content().
				1045	content = item.content() if content is None else content
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1046	logging.info('Push state size: %d', push_state.size)
				1047	if isinstance(content, (basestring, list)):
				1048	# Memory is already used, too late.
				1049	with self._lock:
				1050	self._memory_use += push_state.size
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1051	else:
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1052	# TODO(vadimsh): Do not read from \|content\| generator when retrying push.
				1053	# If \|content\| is indeed a generator, it can not be re-winded back to the
				1054	# beginning of the stream. A retry will find it exhausted. A possible
				1055	# solution is to wrap \|content\| generator with some sort of caching
				1056	# restartable generator. It should be done alongside streaming support
				1057	# implementation.
				1058	#
				1059	# In theory, we should keep the generator, so that it is not serialized in
				1060	# memory. Sadly net.HttpService.request() requires the body to be
				1061	# serialized.
				1062	assert isinstance(content, types.GeneratorType), repr(content)
				1063	slept = False
				1064	# HACK HACK HACK. Please forgive me for my sins but OMG, it works!
Marc-Antoine Ruel	e6677c8	2015-02-05 14:54:22 -0500	[diff] [blame]	1065	# One byte less than 512mb. This is to cope with incompressible content.
				1066	max_size = int(sys.maxsize * 0.25)
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1067	while True:
				1068	with self._lock:
				1069	# This is due to 32 bits python when uploading very large files. The
				1070	# problem is that it's comparing uncompressed sizes, while we care
				1071	# about compressed sizes since it's what is serialized in memory.
				1072	# The first check assumes large files are compressible and that by
				1073	# throttling one upload at once, we can survive. Otherwise, kaboom.
				1074	memory_use = self._memory_use
				1075	if ((push_state.size >= max_size and not memory_use) or
				1076	(memory_use + push_state.size <= max_size)):
				1077	self._memory_use += push_state.size
				1078	memory_use = self._memory_use
				1079	break
				1080	time.sleep(0.1)
				1081	slept = True
				1082	if slept:
				1083	logging.info('Unblocked: %d %d', memory_use, push_state.size)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1084
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1085	try:
				1086	# This push operation may be a retry after failed finalization call below,
				1087	# no need to reupload contents in that case.
				1088	if not push_state.uploaded:
				1089	# PUT file to \|upload_url\|.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1090	success = self.do_push(push_state, content)
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1091	if not success:
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1092	raise IOError('Failed to upload file with hash %s to URL %s' % (
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1093	item.digest, push_state.upload_url))
				1094	push_state.uploaded = True
				1095	else:
				1096	logging.info(
				1097	'A file %s already uploaded, retrying finalization only',
				1098	item.digest)
				1099
				1100	# Optionally notify the server that it's done.
				1101	if push_state.finalize_url:
				1102	# TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
				1103	# send it to isolated server. That way isolate server can verify that
				1104	# the data safely reached Google Storage (GS provides MD5 and CRC32C of
				1105	# stored files).
				1106	# TODO(maruel): Fix the server to accept properly data={} so
				1107	# url_read_json() can be used.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1108	response = net.url_read_json(
				1109	url='%s/%s' % (self._base_url, push_state.finalize_url),
				1110	data={
				1111	'upload_ticket': push_state.preupload_status['upload_ticket'],
				1112	})
				1113	if not response or not response['ok']:
				1114	raise IOError('Failed to finalize file with hash %s.' % item.digest)
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1115	push_state.finalized = True
				1116	finally:
				1117	with self._lock:
				1118	self._memory_use -= push_state.size
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1119
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1120	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1121	# Ensure all items were initialized with 'prepare' call. Storage does that.
				1122	assert all(i.digest is not None and i.size is not None for i in items)
				1123
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1124	# Request body is a json encoded list of dicts.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1125	body = {
				1126	'items': [
				1127	{
				1128	'digest': item.digest,
				1129	'is_isolated': bool(item.high_priority),
				1130	'size': item.size,
				1131	} for item in items
				1132	],
				1133	'namespace': self._namespace_dict,
				1134	}
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1135
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1136	query_url = '%s/_ah/api/isolateservice/v1/preupload' % self._base_url
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1137
				1138	# Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1139	response = None
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1140	try:
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1141	response = net.url_read_json(url=query_url, data=body)
				1142	if response is None:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1143	raise isolated_format.MappingError(
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1144	'Failed to execute preupload query')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1145	except ValueError as err:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1146	raise isolated_format.MappingError(
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1147	'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1148
				1149	# Pick Items that are missing, attach _PushState to them.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1150	missing_items = {}
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1151	for preupload_status in response.get('items', []):
				1152	assert 'upload_ticket' in preupload_status, (
				1153	preupload_status, '/preupload did not generate an upload ticket')
				1154	index = int(preupload_status['index'])
				1155	missing_items[items[index]] = _IsolateServerPushState(
				1156	preupload_status, items[index].size)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1157	logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1158	len(items), len(items) - len(missing_items))
				1159	return missing_items
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1160
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1161	def do_fetch(self, url, digest, offset):
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1162	"""Fetches isolated data from the URL.
				1163
				1164	Used only for fetching files, not for API calls. Can be overridden in
				1165	subclasses.
				1166
				1167	Args:
				1168	url: URL to fetch the data from, can possibly return http redirect.
				1169	offset: byte offset inside the file to start fetching from.
				1170
				1171	Returns:
				1172	net.HttpResponse compatible object, with 'read' and 'get_header' calls.
				1173	"""
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1174	assert isinstance(offset, int)
				1175	data = {
				1176	'digest': digest.encode('utf-8'),
				1177	'namespace': self._namespace_dict,
				1178	'offset': offset,
				1179	}
				1180	return net.url_read_json(
				1181	url=url,
				1182	data=data,
				1183	read_timeout=DOWNLOAD_READ_TIMEOUT)
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1184
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1185	def do_push(self, push_state, content):
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1186	"""Uploads isolated file to the URL.
				1187
				1188	Used only for storing files, not for API calls. Can be overridden in
				1189	subclasses.
				1190
				1191	Args:
				1192	url: URL to upload the data to.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1193	push_state: an _IsolateServicePushState instance
				1194	item: the original Item to be uploaded
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1195	content: an iterable that yields 'str' chunks.
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1196	"""
				1197	# A cheezy way to avoid memcpy of (possibly huge) file, until streaming
				1198	# upload support is implemented.
				1199	if isinstance(content, list) and len(content) == 1:
				1200	content = content[0]
				1201	else:
				1202	content = ''.join(content)
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1203
				1204	# DB upload
				1205	if not push_state.finalize_url:
				1206	url = '%s/%s' % (self._base_url, push_state.upload_url)
				1207	content = base64.b64encode(content)
				1208	data = {
				1209	'upload_ticket': push_state.preupload_status['upload_ticket'],
				1210	'content': content,
				1211	}
				1212	response = net.url_read_json(url=url, data=data)
				1213	return response is not None and response['ok']
				1214
				1215	# upload to GS
				1216	url = push_state.upload_url
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1217	response = net.url_read(
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1218	content_type='application/octet-stream',
				1219	data=content,
				1220	method='PUT',
				1221	url=url)
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1222	return response is not None
				1223
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1224
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1225	class LocalCache(object):
				1226	"""Local cache that stores objects fetched via Storage.
				1227
				1228	It can be accessed concurrently from multiple threads, so it should protect
				1229	its internal state with some lock.
				1230	"""
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1231	cache_dir = None
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1232
				1233	def __enter__(self):
				1234	"""Context manager interface."""
				1235	return self
				1236
				1237	def __exit__(self, _exc_type, _exec_value, _traceback):
				1238	"""Context manager interface."""
				1239	return False
				1240
				1241	def cached_set(self):
				1242	"""Returns a set of all cached digests (always a new object)."""
				1243	raise NotImplementedError()
				1244
				1245	def touch(self, digest, size):
				1246	"""Ensures item is not corrupted and updates its LRU position.
				1247
				1248	Arguments:
				1249	digest: hash digest of item to check.
				1250	size: expected size of this item.
				1251
				1252	Returns:
				1253	True if item is in cache and not corrupted.
				1254	"""
				1255	raise NotImplementedError()
				1256
				1257	def evict(self, digest):
				1258	"""Removes item from cache if it's there."""
				1259	raise NotImplementedError()
				1260
				1261	def read(self, digest):
				1262	"""Returns contents of the cached item as a single str."""
				1263	raise NotImplementedError()
				1264
				1265	def write(self, digest, content):
				1266	"""Reads data from \|content\| generator and stores it in cache."""
				1267	raise NotImplementedError()
				1268
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1269	def hardlink(self, digest, dest, file_mode):
				1270	"""Ensures file at \|dest\| has same content as cached \|digest\|.
				1271
				1272	If file_mode is provided, it is used to set the executable bit if
				1273	applicable.
				1274	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1275	raise NotImplementedError()
				1276
				1277
				1278	class MemoryCache(LocalCache):
				1279	"""LocalCache implementation that stores everything in memory."""
				1280
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1281	def __init__(self, file_mode_mask=0500):
				1282	"""Args:
				1283	file_mode_mask: bit mask to AND file mode with. Default value will make
				1284	all mapped files to be read only.
				1285	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1286	super(MemoryCache, self).__init__()
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1287	self._file_mode_mask = file_mode_mask
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1288	# Let's not assume dict is thread safe.
				1289	self._lock = threading.Lock()
				1290	self._contents = {}
				1291
				1292	def cached_set(self):
				1293	with self._lock:
				1294	return set(self._contents)
				1295
				1296	def touch(self, digest, size):
				1297	with self._lock:
				1298	return digest in self._contents
				1299
				1300	def evict(self, digest):
				1301	with self._lock:
				1302	self._contents.pop(digest, None)
				1303
				1304	def read(self, digest):
				1305	with self._lock:
				1306	return self._contents[digest]
				1307
				1308	def write(self, digest, content):
				1309	# Assemble whole stream before taking the lock.
				1310	data = ''.join(content)
				1311	with self._lock:
				1312	self._contents[digest] = data
				1313
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1314	def hardlink(self, digest, dest, file_mode):
				1315	"""Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1316	file_write(dest, [self.read(digest)])
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1317	if file_mode is not None:
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1318	os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1319
				1320
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1321	class CachePolicies(object):
				1322	def __init__(self, max_cache_size, min_free_space, max_items):
				1323	"""
				1324	Arguments:
				1325	- max_cache_size: Trim if the cache gets larger than this value. If 0, the
				1326	cache is effectively a leak.
				1327	- min_free_space: Trim if disk free space becomes lower than this value. If
				1328	0, it unconditionally fill the disk.
				1329	- max_items: Maximum number of items to keep in the cache. If 0, do not
				1330	enforce a limit.
				1331	"""
				1332	self.max_cache_size = max_cache_size
				1333	self.min_free_space = min_free_space
				1334	self.max_items = max_items
				1335
				1336
				1337	class DiskCache(LocalCache):
				1338	"""Stateful LRU cache in a flat hash table in a directory.
				1339
				1340	Saves its state as json file.
				1341	"""
				1342	STATE_FILE = 'state.json'
				1343
				1344	def __init__(self, cache_dir, policies, hash_algo):
				1345	"""
				1346	Arguments:
				1347	cache_dir: directory where to place the cache.
				1348	policies: cache retention policies.
				1349	algo: hashing algorithm used.
				1350	"""
				1351	super(DiskCache, self).__init__()
				1352	self.cache_dir = cache_dir
				1353	self.policies = policies
				1354	self.hash_algo = hash_algo
				1355	self.state_file = os.path.join(cache_dir, self.STATE_FILE)
				1356
				1357	# All protected methods (starting with '_') except _path should be called
				1358	# with this lock locked.
				1359	self._lock = threading_utils.LockWithAssert()
				1360	self._lru = lru.LRUDict()
				1361
				1362	# Profiling values.
				1363	self._added = []
				1364	self._removed = []
				1365	self._free_disk = 0
				1366
				1367	with tools.Profiler('Setup'):
				1368	with self._lock:
				1369	self._load()
				1370
				1371	def __enter__(self):
				1372	return self
				1373
				1374	def __exit__(self, _exc_type, _exec_value, _traceback):
				1375	with tools.Profiler('CleanupTrimming'):
				1376	with self._lock:
				1377	self._trim()
				1378
				1379	logging.info(
				1380	'%5d (%8dkb) added',
				1381	len(self._added), sum(self._added) / 1024)
				1382	logging.info(
				1383	'%5d (%8dkb) current',
				1384	len(self._lru),
				1385	sum(self._lru.itervalues()) / 1024)
				1386	logging.info(
				1387	'%5d (%8dkb) removed',
				1388	len(self._removed), sum(self._removed) / 1024)
				1389	logging.info(
				1390	' %8dkb free',
				1391	self._free_disk / 1024)
				1392	return False
				1393
				1394	def cached_set(self):
				1395	with self._lock:
				1396	return self._lru.keys_set()
				1397
				1398	def touch(self, digest, size):
				1399	"""Verifies an actual file is valid.
				1400
				1401	Note that is doesn't compute the hash so it could still be corrupted if the
				1402	file size didn't change.
				1403
				1404	TODO(maruel): More stringent verification while keeping the check fast.
				1405	"""
				1406	# Do the check outside the lock.
				1407	if not is_valid_file(self._path(digest), size):
				1408	return False
				1409
				1410	# Update it's LRU position.
				1411	with self._lock:
				1412	if digest not in self._lru:
				1413	return False
				1414	self._lru.touch(digest)
				1415	return True
				1416
				1417	def evict(self, digest):
				1418	with self._lock:
				1419	self._lru.pop(digest)
				1420	self._delete_file(digest, UNKNOWN_FILE_SIZE)
				1421
				1422	def read(self, digest):
				1423	with open(self._path(digest), 'rb') as f:
				1424	return f.read()
				1425
				1426	def write(self, digest, content):
Marc-Antoine Ruel	df4976d	2015-04-15 19:56:21 -0400	[diff] [blame]	1427	assert content is not None
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1428	path = self._path(digest)
				1429	# A stale broken file may remain. It is possible for the file to have write
				1430	# access bit removed which would cause the file_write() call to fail to open
				1431	# in write mode. Take no chance here.
				1432	file_path.try_remove(path)
				1433	try:
				1434	size = file_write(path, content)
				1435	except:
				1436	# There are two possible places were an exception can occur:
				1437	# 1) Inside \|content\| generator in case of network or unzipping errors.
				1438	# 2) Inside file_write itself in case of disk IO errors.
				1439	# In any case delete an incomplete file and propagate the exception to
				1440	# caller, it will be logged there.
				1441	file_path.try_remove(path)
				1442	raise
				1443	# Make the file read-only in the cache. This has a few side-effects since
				1444	# the file node is modified, so every directory entries to this file becomes
				1445	# read-only. It's fine here because it is a new file.
				1446	file_path.set_read_only(path, True)
				1447	with self._lock:
				1448	self._add(digest, size)
				1449
				1450	def hardlink(self, digest, dest, file_mode):
				1451	"""Hardlinks the file to \|dest\|.
				1452
				1453	Note that the file permission bits are on the file node, not the directory
				1454	entry, so changing the access bit on any of the directory entries for the
				1455	file node will affect them all.
				1456	"""
				1457	path = self._path(digest)
				1458	# TODO(maruel): file_path.HARDLINK_WITH_FALLBACK ?
				1459	file_path.hardlink(path, dest)
				1460	if file_mode is not None:
				1461	# Ignores all other bits.
				1462	os.chmod(dest, file_mode & 0500)
				1463
				1464	def _load(self):
				1465	"""Loads state of the cache from json file."""
				1466	self._lock.assert_locked()
				1467
				1468	if not os.path.isdir(self.cache_dir):
				1469	os.makedirs(self.cache_dir)
				1470	else:
				1471	# Make sure the cache is read-only.
				1472	# TODO(maruel): Calculate the cost and optimize the performance
				1473	# accordingly.
				1474	file_path.make_tree_read_only(self.cache_dir)
				1475
				1476	# Load state of the cache.
				1477	if os.path.isfile(self.state_file):
				1478	try:
				1479	self._lru = lru.LRUDict.load(self.state_file)
				1480	except ValueError as err:
				1481	logging.error('Failed to load cache state: %s' % (err,))
				1482	# Don't want to keep broken state file.
				1483	file_path.try_remove(self.state_file)
				1484
				1485	# Ensure that all files listed in the state still exist and add new ones.
				1486	previous = self._lru.keys_set()
				1487	unknown = []
				1488	for filename in os.listdir(self.cache_dir):
				1489	if filename == self.STATE_FILE:
				1490	continue
				1491	if filename in previous:
				1492	previous.remove(filename)
				1493	continue
				1494	# An untracked file.
				1495	if not isolated_format.is_valid_hash(filename, self.hash_algo):
				1496	logging.warning('Removing unknown file %s from cache', filename)
Marc-Antoine Ruel	8cd3337	2015-02-09 12:54:43 -0500	[diff] [blame]	1497	p = self._path(filename)
				1498	if os.path.isdir(p):
				1499	try:
				1500	file_path.rmtree(p)
				1501	except OSError:
				1502	pass
				1503	else:
				1504	file_path.try_remove(p)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1505	continue
				1506	# File that's not referenced in 'state.json'.
				1507	# TODO(vadimsh): Verify its SHA1 matches file name.
				1508	logging.warning('Adding unknown file %s to cache', filename)
				1509	unknown.append(filename)
				1510
				1511	if unknown:
				1512	# Add as oldest files. They will be deleted eventually if not accessed.
				1513	self._add_oldest_list(unknown)
				1514	logging.warning('Added back %d unknown files', len(unknown))
				1515
				1516	if previous:
				1517	# Filter out entries that were not found.
				1518	logging.warning('Removed %d lost files', len(previous))
				1519	for filename in previous:
				1520	self._lru.pop(filename)
				1521	self._trim()
				1522
				1523	def _save(self):
				1524	"""Saves the LRU ordering."""
				1525	self._lock.assert_locked()
				1526	if sys.platform != 'win32':
				1527	d = os.path.dirname(self.state_file)
				1528	if os.path.isdir(d):
				1529	# Necessary otherwise the file can't be created.
				1530	file_path.set_read_only(d, False)
				1531	if os.path.isfile(self.state_file):
				1532	file_path.set_read_only(self.state_file, False)
				1533	self._lru.save(self.state_file)
				1534
				1535	def _trim(self):
				1536	"""Trims anything we don't know, make sure enough free space exists."""
				1537	self._lock.assert_locked()
				1538
				1539	# Ensure maximum cache size.
				1540	if self.policies.max_cache_size:
				1541	total_size = sum(self._lru.itervalues())
				1542	while total_size > self.policies.max_cache_size:
				1543	total_size -= self._remove_lru_file()
				1544
				1545	# Ensure maximum number of items in the cache.
				1546	if self.policies.max_items and len(self._lru) > self.policies.max_items:
				1547	for _ in xrange(len(self._lru) - self.policies.max_items):
				1548	self._remove_lru_file()
				1549
				1550	# Ensure enough free space.
				1551	self._free_disk = file_path.get_free_space(self.cache_dir)
				1552	trimmed_due_to_space = False
				1553	while (
				1554	self.policies.min_free_space and
				1555	self._lru and
				1556	self._free_disk < self.policies.min_free_space):
				1557	trimmed_due_to_space = True
				1558	self._remove_lru_file()
				1559	self._free_disk = file_path.get_free_space(self.cache_dir)
				1560	if trimmed_due_to_space:
				1561	total_usage = sum(self._lru.itervalues())
				1562	usage_percent = 0.
				1563	if total_usage:
				1564	usage_percent = 100. * self.policies.max_cache_size / float(total_usage)
				1565	logging.warning(
				1566	'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
				1567	'cache (%.1f%% of its maximum capacity)',
				1568	self._free_disk / 1024.,
				1569	total_usage / 1024.,
				1570	usage_percent)
				1571	self._save()
				1572
				1573	def _path(self, digest):
				1574	"""Returns the path to one item."""
				1575	return os.path.join(self.cache_dir, digest)
				1576
				1577	def _remove_lru_file(self):
				1578	"""Removes the last recently used file and returns its size."""
				1579	self._lock.assert_locked()
				1580	digest, size = self._lru.pop_oldest()
				1581	self._delete_file(digest, size)
				1582	return size
				1583
				1584	def _add(self, digest, size=UNKNOWN_FILE_SIZE):
				1585	"""Adds an item into LRU cache marking it as a newest one."""
				1586	self._lock.assert_locked()
				1587	if size == UNKNOWN_FILE_SIZE:
				1588	size = os.stat(self._path(digest)).st_size
				1589	self._added.append(size)
				1590	self._lru.add(digest, size)
				1591
				1592	def _add_oldest_list(self, digests):
				1593	"""Adds a bunch of items into LRU cache marking them as oldest ones."""
				1594	self._lock.assert_locked()
				1595	pairs = []
				1596	for digest in digests:
				1597	size = os.stat(self._path(digest)).st_size
				1598	self._added.append(size)
				1599	pairs.append((digest, size))
				1600	self._lru.batch_insert_oldest(pairs)
				1601
				1602	def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
				1603	"""Deletes cache file from the file system."""
				1604	self._lock.assert_locked()
				1605	try:
				1606	if size == UNKNOWN_FILE_SIZE:
				1607	size = os.stat(self._path(digest)).st_size
				1608	file_path.try_remove(self._path(digest))
				1609	self._removed.append(size)
				1610	except OSError as e:
				1611	logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
				1612
				1613
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1614	class IsolatedBundle(object):
				1615	"""Fetched and parsed .isolated file with all dependencies."""
				1616
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1617	def __init__(self):
				1618	self.command = []
				1619	self.files = {}
				1620	self.read_only = None
				1621	self.relative_cwd = None
				1622	# The main .isolated file, a IsolatedFile instance.
				1623	self.root = None
				1624
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1625	def fetch(self, fetch_queue, root_isolated_hash, algo):
				1626	"""Fetches the .isolated and all the included .isolated.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1627
				1628	It enables support for "included" .isolated files. They are processed in
				1629	strict order but fetched asynchronously from the cache. This is important so
				1630	that a file in an included .isolated file that is overridden by an embedding
				1631	.isolated file is not fetched needlessly. The includes are fetched in one
				1632	pass and the files are fetched as soon as all the ones on the left-side
				1633	of the tree were fetched.
				1634
				1635	The prioritization is very important here for nested .isolated files.
				1636	'includes' have the highest priority and the algorithm is optimized for both
				1637	deep and wide trees. A deep one is a long link of .isolated files referenced
				1638	one at a time by one item in 'includes'. A wide one has a large number of
				1639	'includes' in a single .isolated file. 'left' is defined as an included
				1640	.isolated file earlier in the 'includes' list. So the order of the elements
				1641	in 'includes' is important.
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1642
				1643	As a side effect this method starts asynchronous fetch of all data files
				1644	by adding them to \|fetch_queue\|. It doesn't wait for data files to finish
				1645	fetching though.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1646	"""
				1647	self.root = isolated_format.IsolatedFile(root_isolated_hash, algo)
				1648
				1649	# Isolated files being retrieved now: hash -> IsolatedFile instance.
				1650	pending = {}
				1651	# Set of hashes of already retrieved items to refuse recursive includes.
				1652	seen = set()
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1653	# Set of IsolatedFile's whose data files have already being fetched.
				1654	processed = set()
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1655
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1656	def retrieve_async(isolated_file):
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1657	h = isolated_file.obj_hash
				1658	if h in seen:
				1659	raise isolated_format.IsolatedError(
				1660	'IsolatedFile %s is retrieved recursively' % h)
				1661	assert h not in pending
				1662	seen.add(h)
				1663	pending[h] = isolated_file
				1664	fetch_queue.add(h, priority=threading_utils.PRIORITY_HIGH)
				1665
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1666	# Start fetching root *.isolated file (single file, not the whole bundle).
				1667	retrieve_async(self.root)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1668
				1669	while pending:
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1670	# Wait until some *.isolated file is fetched, parse it.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1671	item_hash = fetch_queue.wait(pending)
				1672	item = pending.pop(item_hash)
				1673	item.load(fetch_queue.cache.read(item_hash))
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1674
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1675	# Start fetching included *.isolated files.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1676	for new_child in item.children:
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1677	retrieve_async(new_child)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1678
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1679	# Always fetch *.isolated files in traversal order, waiting if necessary
				1680	# until next to-be-processed node loads. "Waiting" is done by yielding
				1681	# back to the outer loop, that waits until some *.isolated is loaded.
				1682	for node in isolated_format.walk_includes(self.root):
				1683	if node not in processed:
				1684	# Not visited, and not yet loaded -> wait for it to load.
				1685	if not node.is_loaded:
				1686	break
				1687	# Not visited and loaded -> process it and continue the traversal.
				1688	self._start_fetching_files(node, fetch_queue)
				1689	processed.add(node)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1690
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1691	# All *.isolated files should be processed by now and only them.
				1692	all_isolateds = set(isolated_format.walk_includes(self.root))
				1693	assert all_isolateds == processed, (all_isolateds, processed)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1694
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1695	# Extract 'command' and other bundle properties.
				1696	for node in isolated_format.walk_includes(self.root):
				1697	self._update_self(node)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1698	self.relative_cwd = self.relative_cwd or ''
				1699
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1700	def _start_fetching_files(self, isolated, fetch_queue):
				1701	"""Starts fetching files from \|isolated\| that are not yet being fetched.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1702
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1703	Modifies self.files.
				1704	"""
				1705	logging.debug('fetch_files(%s)', isolated.obj_hash)
				1706	for filepath, properties in isolated.data.get('files', {}).iteritems():
				1707	# Root isolated has priority on the files being mapped. In particular,
				1708	# overridden files must not be fetched.
				1709	if filepath not in self.files:
				1710	self.files[filepath] = properties
				1711	if 'h' in properties:
				1712	# Preemptively request files.
				1713	logging.debug('fetching %s', filepath)
				1714	fetch_queue.add(
				1715	properties['h'], properties['s'], threading_utils.PRIORITY_MED)
				1716
				1717	def _update_self(self, node):
				1718	"""Extracts bundle global parameters from loaded *.isolated file.
				1719
				1720	Will be called with each loaded *.isolated file in order of traversal of
				1721	isolated include graph (see isolated_format.walk_includes).
				1722	"""
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1723	# Grabs properties.
				1724	if not self.command and node.data.get('command'):
				1725	# Ensure paths are correctly separated on windows.
				1726	self.command = node.data['command']
				1727	if self.command:
				1728	self.command[0] = self.command[0].replace('/', os.path.sep)
				1729	self.command = tools.fix_python_path(self.command)
				1730	if self.read_only is None and node.data.get('read_only') is not None:
				1731	self.read_only = node.data['read_only']
				1732	if (self.relative_cwd is None and
				1733	node.data.get('relative_cwd') is not None):
				1734	self.relative_cwd = node.data['relative_cwd']
				1735
				1736
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1737	def set_storage_api_class(cls):
				1738	"""Replaces StorageApi implementation used by default."""
				1739	global _storage_api_cls
				1740	assert _storage_api_cls is None
				1741	assert issubclass(cls, StorageApi)
				1742	_storage_api_cls = cls
				1743
				1744
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1745	def get_storage_api(url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1746	"""Returns an object that implements low-level StorageApi interface.
				1747
				1748	It is used by Storage to work with single isolate \|namespace\|. It should
				1749	rarely be used directly by clients, see 'get_storage' for
				1750	a better alternative.
				1751
				1752	Arguments:
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1753	url: URL of isolate service to use shared cloud based storage.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1754	namespace: isolate namespace to operate in, also defines hashing and
				1755	compression scheme used, i.e. namespace names that end with '-gzip'
				1756	store compressed data.
				1757
				1758	Returns:
				1759	Instance of StorageApi subclass.
				1760	"""
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1761	cls = _storage_api_cls or IsolateServer
				1762	return cls(url, namespace)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1763
				1764
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1765	def get_storage(url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1766	"""Returns Storage class that can upload and download from \|namespace\|.
				1767
				1768	Arguments:
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1769	url: URL of isolate service to use shared cloud based storage.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1770	namespace: isolate namespace to operate in, also defines hashing and
				1771	compression scheme used, i.e. namespace names that end with '-gzip'
				1772	store compressed data.
				1773
				1774	Returns:
				1775	Instance of Storage.
				1776	"""
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1777	return Storage(get_storage_api(url, namespace))
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1778
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1779
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	1780	def upload_tree(base_url, infiles, namespace):
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1781	"""Uploads the given tree to the given url.
				1782
				1783	Arguments:
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	1784	base_url: The url of the isolate server to upload to.
				1785	infiles: iterable of pairs (absolute path, metadata dict) of files.
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	1786	namespace: The namespace to use on the server.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1787	"""
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	1788	# Convert \|infiles\| into a list of FileItem objects, skip duplicates.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1789	# Filter out symlinks, since they are not represented by items on isolate
				1790	# server side.
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	1791	items = []
				1792	seen = set()
				1793	skipped = 0
				1794	for filepath, metadata in infiles:
				1795	if 'l' not in metadata and filepath not in seen:
				1796	seen.add(filepath)
				1797	item = FileItem(
				1798	path=filepath,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1799	digest=metadata['h'],
				1800	size=metadata['s'],
				1801	high_priority=metadata.get('priority') == '0')
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	1802	items.append(item)
				1803	else:
				1804	skipped += 1
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1805
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	1806	logging.info('Skipped %d duplicated entries', skipped)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1807	with get_storage(base_url, namespace) as storage:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1808	storage.upload_items(items)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1809
				1810
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1811	def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1812	"""Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1813
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1814	Arguments:
				1815	isolated_hash: hash of the root *.isolated file.
				1816	storage: Storage class that communicates with isolate storage.
				1817	cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1818	outdir: Output directory to map file tree to.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1819	require_command: Ensure *.isolated specifies a command to run.
				1820
				1821	Returns:
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1822	IsolatedBundle object that holds details about loaded *.isolated file.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1823	"""
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	1824	logging.debug(
				1825	'fetch_isolated(%s, %s, %s, %s, %s)',
				1826	isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1827	# Hash algorithm to use, defined by namespace \|storage\| is using.
				1828	algo = storage.hash_algo
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1829	with cache:
				1830	fetch_queue = FetchQueue(storage, cache)
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1831	bundle = IsolatedBundle()
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1832
				1833	with tools.Profiler('GetIsolateds'):
				1834	# Optionally support local files by manually adding them to cache.
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	1835	if not isolated_format.is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	1836	logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
				1837	try:
				1838	isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
				1839	except IOError:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1840	raise isolated_format.MappingError(
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	1841	'%s doesn\'t seem to be a valid file. Did you intent to pass a '
				1842	'valid hash?' % isolated_hash)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1843
				1844	# Load all *.isolated and start loading rest of the files.
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1845	bundle.fetch(fetch_queue, isolated_hash, algo)
				1846	if require_command and not bundle.command:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1847	# TODO(vadimsh): All fetch operations are already enqueue and there's no
				1848	# easy way to cancel them.
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1849	raise isolated_format.IsolatedError('No command to run')
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1850
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1851	with tools.Profiler('GetRest'):
				1852	# Create file system hierarchy.
				1853	if not os.path.isdir(outdir):
				1854	os.makedirs(outdir)
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1855	create_directories(outdir, bundle.files)
				1856	create_symlinks(outdir, bundle.files.iteritems())
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1857
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1858	# Ensure working directory exists.
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1859	cwd = os.path.normpath(os.path.join(outdir, bundle.relative_cwd))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1860	if not os.path.isdir(cwd):
				1861	os.makedirs(cwd)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1862
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1863	# Multimap: digest -> list of pairs (path, props).
				1864	remaining = {}
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1865	for filepath, props in bundle.files.iteritems():
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1866	if 'h' in props:
				1867	remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1868
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1869	# Now block on the remaining files to be downloaded and mapped.
				1870	logging.info('Retrieving remaining files (%d of them)...',
				1871	fetch_queue.pending_count)
				1872	last_update = time.time()
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1873	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1874	while remaining:
				1875	detector.ping()
				1876
				1877	# Wait for any item to finish fetching to cache.
				1878	digest = fetch_queue.wait(remaining)
				1879
				1880	# Link corresponding files to a fetched item in cache.
				1881	for filepath, props in remaining.pop(digest):
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1882	cache.hardlink(
				1883	digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1884
				1885	# Report progress.
				1886	duration = time.time() - last_update
				1887	if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
				1888	msg = '%d files remaining...' % len(remaining)
				1889	print msg
				1890	logging.info(msg)
				1891	last_update = time.time()
				1892
				1893	# Cache could evict some items we just tried to fetch, it's a fatal error.
				1894	if not fetch_queue.verify_all_cached():
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1895	raise isolated_format.MappingError(
				1896	'Cache is too small to hold all requested files')
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1897	return bundle
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1898
				1899
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1900	def directory_to_metadata(root, algo, blacklist):
				1901	"""Returns the FileItem list and .isolated metadata for a directory."""
				1902	root = file_path.get_native_path_case(root)
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	1903	paths = isolated_format.expand_directory_and_symlink(
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	1904	root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	1905	metadata = {
				1906	relpath: isolated_format.file_to_metadata(
Marc-Antoine Ruel	f1d827c	2014-11-24 15:22:25 -0500	[diff] [blame]	1907	os.path.join(root, relpath), {}, 0, algo)
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	1908	for relpath in paths
				1909	}
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1910	for v in metadata.itervalues():
				1911	v.pop('t')
				1912	items = [
				1913	FileItem(
				1914	path=os.path.join(root, relpath),
				1915	digest=meta['h'],
				1916	size=meta['s'],
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1917	high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1918	for relpath, meta in metadata.iteritems() if 'h' in meta
				1919	]
				1920	return items, metadata
				1921
				1922
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1923	def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1924	"""Stores every entries and returns the relevant data.
				1925
				1926	Arguments:
				1927	storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1928	files: list of file paths to upload. If a directory is specified, a
				1929	.isolated file is created and its hash is returned.
				1930	blacklist: function that returns True if a file should be omitted.
				1931	"""
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1932	assert all(isinstance(i, unicode) for i in files), files
				1933	if len(files) != len(set(map(os.path.abspath, files))):
				1934	raise Error('Duplicate entries found.')
				1935
				1936	results = []
				1937	# The temporary directory is only created as needed.
				1938	tempdir = None
				1939	try:
				1940	# TODO(maruel): Yield the files to a worker thread.
				1941	items_to_upload = []
				1942	for f in files:
				1943	try:
				1944	filepath = os.path.abspath(f)
				1945	if os.path.isdir(filepath):
				1946	# Uploading a whole directory.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1947	items, metadata = directory_to_metadata(
				1948	filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1949
				1950	# Create the .isolated file.
				1951	if not tempdir:
Marc-Antoine Ruel	3c979cb	2015-03-11 13:43:28 -0400	[diff] [blame]	1952	tempdir = tempfile.mkdtemp(prefix=u'isolateserver')
				1953	handle, isolated = tempfile.mkstemp(dir=tempdir, suffix=u'.isolated')
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1954	os.close(handle)
				1955	data = {
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	1956	'algo':
				1957	isolated_format.SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1958	'files': metadata,
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	1959	'version': isolated_format.ISOLATED_FILE_VERSION,
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1960	}
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1961	isolated_format.save_isolated(isolated, data)
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	1962	h = isolated_format.hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1963	items_to_upload.extend(items)
				1964	items_to_upload.append(
				1965	FileItem(
				1966	path=isolated,
				1967	digest=h,
				1968	size=os.stat(isolated).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1969	high_priority=True))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1970	results.append((h, f))
				1971
				1972	elif os.path.isfile(filepath):
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	1973	h = isolated_format.hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1974	items_to_upload.append(
				1975	FileItem(
				1976	path=filepath,
				1977	digest=h,
				1978	size=os.stat(filepath).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1979	high_priority=f.endswith('.isolated')))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1980	results.append((h, f))
				1981	else:
				1982	raise Error('%s is neither a file or directory.' % f)
				1983	except OSError:
				1984	raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1985	# Technically we would care about which files were uploaded but we don't
				1986	# much in practice.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1987	_uploaded_files = storage.upload_items(items_to_upload)
				1988	return results
				1989	finally:
Marc-Antoine Ruel	1b7bfec	2015-02-11 15:35:42 -0500	[diff] [blame]	1990	if tempdir and os.path.isdir(tempdir):
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1991	file_path.rmtree(tempdir)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1992
				1993
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	1994	def archive(out, namespace, files, blacklist):
				1995	if files == ['-']:
				1996	files = sys.stdin.readlines()
				1997
				1998	if not files:
				1999	raise Error('Nothing to upload')
				2000
				2001	files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2002	blacklist = tools.gen_blacklist(blacklist)
				2003	with get_storage(out, namespace) as storage:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2004	results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2005	print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
				2006
				2007
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2008	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2009	def CMDarchive(parser, args):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2010	"""Archives data to the server.
				2011
				2012	If a directory is specified, a .isolated file is created the whole directory
				2013	is uploaded. Then this .isolated file can be included in another one to run
				2014	commands.
				2015
				2016	The commands output each file that was processed with its content hash. For
				2017	directories, the .isolated generated for the directory is listed as the
				2018	directory entry itself.
				2019	"""
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2020	add_isolate_server_options(parser)
Marc-Antoine Ruel	1f8ba35	2014-11-04 15:55:03 -0500	[diff] [blame]	2021	add_archive_options(parser)
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2022	options, files = parser.parse_args(args)
Marc-Antoine Ruel	e290ada	2014-12-10 19:48:49 -0500	[diff] [blame]	2023	process_isolate_server_options(parser, options, True)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2024	try:
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2025	archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2026	except Error as e:
				2027	parser.error(e.args[0])
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2028	return 0
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2029
				2030
				2031	def CMDdownload(parser, args):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2032	"""Download data from the server.
				2033
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2034	It can either download individual files or a complete tree from a .isolated
				2035	file.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2036	"""
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2037	add_isolate_server_options(parser)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2038	parser.add_option(
Marc-Antoine Ruel	185ded4	2015-01-28 20:49:18 -0500	[diff] [blame]	2039	'-s', '--isolated', metavar='HASH',
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2040	help='hash of an isolated file, .isolated file content is discarded, use '
				2041	'--file if you need it')
				2042	parser.add_option(
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2043	'-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
				2044	help='hash and destination of a file, can be used multiple times')
				2045	parser.add_option(
Marc-Antoine Ruel	f90861c	2015-03-24 20:54:49 -0400	[diff] [blame]	2046	'-t', '--target', metavar='DIR', default='download',
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2047	help='destination directory')
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2048	add_cache_options(parser)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2049	options, args = parser.parse_args(args)
				2050	if args:
				2051	parser.error('Unsupported arguments: %s' % args)
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2052
Marc-Antoine Ruel	e290ada	2014-12-10 19:48:49 -0500	[diff] [blame]	2053	process_isolate_server_options(parser, options, True)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2054	if bool(options.isolated) == bool(options.file):
				2055	parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2056
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2057	cache = process_cache_options(options)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2058	options.target = os.path.abspath(options.target)
Marc-Antoine Ruel	f90861c	2015-03-24 20:54:49 -0400	[diff] [blame]	2059	if options.isolated:
				2060	if (os.path.isfile(options.target) or
				2061	(os.path.isdir(options.target) and os.listdir(options.target))):
				2062	parser.error(
				2063	'--target \'%s\' exists, please use another target' % options.target)
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2064	with get_storage(options.isolate_server, options.namespace) as storage:
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2065	# Fetching individual files.
				2066	if options.file:
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2067	# TODO(maruel): Enable cache in this case too.
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2068	channel = threading_utils.TaskChannel()
				2069	pending = {}
				2070	for digest, dest in options.file:
				2071	pending[digest] = dest
				2072	storage.async_fetch(
				2073	channel,
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	2074	threading_utils.PRIORITY_MED,
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2075	digest,
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	2076	UNKNOWN_FILE_SIZE,
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2077	functools.partial(file_write, os.path.join(options.target, dest)))
				2078	while pending:
				2079	fetched = channel.pull()
				2080	dest = pending.pop(fetched)
				2081	logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2082
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2083	# Fetching whole isolated tree.
				2084	if options.isolated:
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2085	with cache:
				2086	bundle = fetch_isolated(
				2087	isolated_hash=options.isolated,
				2088	storage=storage,
				2089	cache=cache,
				2090	outdir=options.target,
				2091	require_command=False)
				2092	if bundle.command:
				2093	rel = os.path.join(options.target, bundle.relative_cwd)
				2094	print('To run this test please run from the directory %s:' %
				2095	os.path.join(options.target, rel))
				2096	print(' ' + ' '.join(bundle.command))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2097
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2098	return 0
				2099
				2100
Marc-Antoine Ruel	1f8ba35	2014-11-04 15:55:03 -0500	[diff] [blame]	2101	def add_archive_options(parser):
				2102	parser.add_option(
				2103	'--blacklist',
				2104	action='append', default=list(DEFAULT_BLACKLIST),
				2105	help='List of regexp to use as blacklist filter when uploading '
				2106	'directories')
				2107
				2108
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2109	def add_isolate_server_options(parser):
				2110	"""Adds --isolate-server and --namespace options to parser."""
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2111	parser.add_option(
				2112	'-I', '--isolate-server',
				2113	metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2114	help='URL of the Isolate Server to use. Defaults to the environment '
				2115	'variable ISOLATE_SERVER if set. No need to specify https://, this '
				2116	'is assumed.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2117	parser.add_option(
				2118	'--namespace', default='default-gzip',
				2119	help='The namespace to use on the Isolate Server, default: %default')
				2120
				2121
Marc-Antoine Ruel	e290ada	2014-12-10 19:48:49 -0500	[diff] [blame]	2122	def process_isolate_server_options(parser, options, set_exception_handler):
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2123	"""Processes the --isolate-server option and aborts if not specified.
				2124
				2125	Returns the identity as determined by the server.
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2126	"""
				2127	if not options.isolate_server:
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2128	parser.error('--isolate-server is required.')
Marc-Antoine Ruel	012067b	2014-12-10 15:45:42 -0500	[diff] [blame]	2129	try:
				2130	options.isolate_server = net.fix_url(options.isolate_server)
				2131	except ValueError as e:
				2132	parser.error('--isolate-server %s' % e)
Marc-Antoine Ruel	e290ada	2014-12-10 19:48:49 -0500	[diff] [blame]	2133	if set_exception_handler:
				2134	on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2135	try:
				2136	return auth.ensure_logged_in(options.isolate_server)
				2137	except ValueError as e:
				2138	parser.error(str(e))
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2139
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2140
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2141	def add_cache_options(parser):
				2142	cache_group = optparse.OptionGroup(parser, 'Cache management')
				2143	cache_group.add_option(
				2144	'--cache', metavar='DIR',
				2145	help='Directory to keep a local cache of the files. Accelerates download '
				2146	'by reusing already downloaded files. Default=%default')
				2147	cache_group.add_option(
				2148	'--max-cache-size',
				2149	type='int',
				2150	metavar='NNN',
				2151	default=2010241024*1024,
				2152	help='Trim if the cache gets larger than this value, default=%default')
				2153	cache_group.add_option(
				2154	'--min-free-space',
				2155	type='int',
				2156	metavar='NNN',
				2157	default=210241024*1024,
				2158	help='Trim if disk free space becomes lower than this value, '
				2159	'default=%default')
				2160	cache_group.add_option(
				2161	'--max-items',
				2162	type='int',
				2163	metavar='NNN',
				2164	default=100000,
				2165	help='Trim if more than this number of items are in the cache '
				2166	'default=%default')
				2167	parser.add_option_group(cache_group)
				2168
				2169
				2170	def process_cache_options(options):
				2171	if options.cache:
				2172	policies = CachePolicies(
				2173	options.max_cache_size, options.min_free_space, options.max_items)
				2174
				2175	# \|options.cache\| path may not exist until DiskCache() instance is created.
				2176	return DiskCache(
Marc-Antoine Ruel	3c979cb	2015-03-11 13:43:28 -0400	[diff] [blame]	2177	unicode(os.path.abspath(options.cache)),
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2178	policies,
				2179	isolated_format.get_hash_algo(options.namespace))
				2180	else:
				2181	return MemoryCache()
				2182
				2183
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame^]	2184	class OptionParserIsolateServer(logging_utils.OptionParserWithLogging):
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2185	def __init__(self, **kwargs):
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame^]	2186	logging_utils.OptionParserWithLogging.__init__(
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	2187	self,
				2188	version=__version__,
				2189	prog=os.path.basename(sys.modules[__name__].__file__),
				2190	**kwargs)
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	2191	auth.add_auth_options(self)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2192
				2193	def parse_args(self, args, *kwargs):
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame^]	2194	options, args = logging_utils.OptionParserWithLogging.parse_args(
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2195	self, args, *kwargs)
Vadim Shtayura	5d1efce	2014-02-04 10:55:43 -0800	[diff] [blame]	2196	auth.process_auth_options(self, options)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2197	return options, args
				2198
				2199
				2200	def main(args):
				2201	dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	2202	return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	2203
				2204
				2205	if __name__ == '__main__':
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2206	fix_encoding.fix_encoding()
				2207	tools.disable_buffering()
				2208	colorama.init()
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2209	sys.exit(main(sys.argv[1:]))