Blame - isolateserver.py - chromium.googlesource.com/infra/luci/client-py

blob: 889a55ec55a0d06836d1483129339cdd94d0e267 [file] [log] [blame]

maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1	#!/usr/bin/env python
Marc-Antoine Ruel	8add124	2013-11-05 17:28:27 -0500	[diff] [blame]	2	# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruel	e98b112	2013-11-05 20:27:57 -0500	[diff] [blame]	3	# Use of this source code is governed under the Apache License, Version 2.0 that
				4	# can be found in the LICENSE file.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	5
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	6	"""Archives a set of files or directories to a server."""
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	7
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	8	__version__ = '0.3.4'
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	9
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	10	import functools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	11	import hashlib
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	12	import json
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	13	import logging
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	14	import os
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	15	import re
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	16	import shutil
				17	import stat
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	18	import sys
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	19	import tempfile
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	20	import threading
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	21	import time
maruel@chromium.org	e82112e	2013-04-24 14:41:55 +0000	[diff] [blame]	22	import urllib
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	23	import urlparse
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	24	import zlib
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	25
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	26	from third_party import colorama
				27	from third_party.depot_tools import fix_encoding
				28	from third_party.depot_tools import subcommand
				29
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	30	from utils import file_path
vadimsh@chromium.org	6b70621	2013-08-28 15:03:46 +0000	[diff] [blame]	31	from utils import net
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	32	from utils import on_error
vadimsh@chromium.org	b074b16	2013-08-22 17:55:46 +0000	[diff] [blame]	33	from utils import threading_utils
vadimsh@chromium.org	a432647	2013-08-24 02:05:41 +0000	[diff] [blame]	34	from utils import tools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	35
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	36	import auth
				37
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	38
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	39	# Version of isolate protocol passed to the server in /handshake request.
				40	ISOLATE_PROTOCOL_VERSION = '1.0'
Marc-Antoine Ruel	1c1edd6	2013-12-06 09:13:13 -0500	[diff] [blame]	41	# Version stored and expected in .isolated files.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	42	ISOLATED_FILE_VERSION = '1.4'
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	43
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	44
				45	# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	46	# All files are sorted by likelihood of a change in the file content
				47	# (currently file size is used to estimate this: larger the file -> larger the
				48	# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	49	# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	50	# and so on. Numbers here is a trade-off; the more per request, the lower the
				51	# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
				52	# larger values cause longer lookups, increasing the initial latency to start
				53	# uploading, which is especially an issue for large files. This value is
				54	# optimized for the "few thousands files to look up with minimal number of large
				55	# files missing" case.
				56	ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org	07fa759	2013-01-11 18:19:30 +0000	[diff] [blame]	57
maruel@chromium.org	9958e4a	2013-09-17 00:01:48 +0000	[diff] [blame]	58
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	59	# A list of already compressed extension types that should not receive any
				60	# compression before being uploaded.
				61	ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel	7f234c8	2014-08-06 21:55:18 -0400	[diff] [blame]	62	'7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
				63	'png', 'wav', 'zip',
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	64	]
				65
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	66
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	67	# The file size to be used when we don't know the correct file size,
				68	# generally used for .isolated files.
				69	UNKNOWN_FILE_SIZE = None
				70
				71
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	72	# Chunk size to use when doing disk I/O.
				73	DISK_FILE_CHUNK = 1024 * 1024
				74
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	75	# Chunk size to use when reading from network stream.
				76	NET_IO_FILE_CHUNK = 16 * 1024
				77
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	78
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	79	# Read timeout in seconds for downloads from isolate storage. If there's no
				80	# response from the server within this timeout whole download will be aborted.
				81	DOWNLOAD_READ_TIMEOUT = 60
				82
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	83	# Maximum expected delay (in seconds) between successive file fetches
				84	# in run_tha_test. If it takes longer than that, a deadlock might be happening
				85	# and all stack frames for all threads are dumped to log.
				86	DEADLOCK_TIMEOUT = 5 * 60
				87
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	88
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	89	# The delay (in seconds) to wait between logging statements when retrieving
				90	# the required files. This is intended to let the user (or buildbot) know that
				91	# the program is still running.
				92	DELAY_BETWEEN_UPDATES_IN_SECS = 30
				93
				94
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	95	# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
				96	# specify the names here.
				97	SUPPORTED_ALGOS = {
				98	'md5': hashlib.md5,
				99	'sha-1': hashlib.sha1,
				100	'sha-512': hashlib.sha512,
				101	}
				102
				103
				104	# Used for serialization.
				105	SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
				106
				107
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	108	DEFAULT_BLACKLIST = (
				109	# Temporary vim or python files.
				110	r'^.+\.(?:pyc\|swp)$',
				111	# .git or .svn directory.
				112	r'^(?:.+' + re.escape(os.path.sep) + r'\|)\.(?:git\|svn)$',
				113	)
				114
				115
				116	# Chromium-specific.
				117	DEFAULT_BLACKLIST += (
				118	r'^.+\.(?:run_test_cases)$',
				119	r'^(?:.+' + re.escape(os.path.sep) + r'\|)testserver\.log$',
				120	)
				121
				122
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	123	class Error(Exception):
				124	"""Generic runtime error."""
				125	pass
				126
				127
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	128	class ConfigError(ValueError):
				129	"""Generic failure to load a .isolated file."""
				130	pass
				131
				132
				133	class MappingError(OSError):
				134	"""Failed to recreate the tree."""
				135	pass
				136
				137
maruel@chromium.org	7b844a6	2013-09-17 13:04:59 +0000	[diff] [blame]	138	def is_valid_hash(value, algo):
				139	"""Returns if the value is a valid hash for the corresponding algorithm."""
				140	size = 2 * algo().digest_size
				141	return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
				142
				143
				144	def hash_file(filepath, algo):
				145	"""Calculates the hash of a file without reading it all in memory at once.
				146
				147	\|algo\| should be one of hashlib hashing algorithm.
				148	"""
				149	digest = algo()
maruel@chromium.org	037758d	2012-12-10 17:59:46 +0000	[diff] [blame]	150	with open(filepath, 'rb') as f:
				151	while True:
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	152	chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org	037758d	2012-12-10 17:59:46 +0000	[diff] [blame]	153	if not chunk:
				154	break
				155	digest.update(chunk)
				156	return digest.hexdigest()
				157
				158
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	159	def stream_read(stream, chunk_size):
				160	"""Reads chunks from \|stream\| and yields them."""
				161	while True:
				162	data = stream.read(chunk_size)
				163	if not data:
				164	break
				165	yield data
				166
				167
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	168	def file_read(filepath, chunk_size=DISK_FILE_CHUNK, offset=0):
				169	"""Yields file content in chunks of \|chunk_size\| starting from \|offset\|."""
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	170	with open(filepath, 'rb') as f:
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	171	if offset:
				172	f.seek(offset)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	173	while True:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	174	data = f.read(chunk_size)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	175	if not data:
				176	break
				177	yield data
				178
				179
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	180	def file_write(filepath, content_generator):
				181	"""Writes file content as generated by content_generator.
				182
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	183	Creates the intermediary directory as needed.
				184
				185	Returns the number of bytes written.
				186
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	187	Meant to be mocked out in unit tests.
				188	"""
				189	filedir = os.path.dirname(filepath)
				190	if not os.path.isdir(filedir):
				191	os.makedirs(filedir)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	192	total = 0
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	193	with open(filepath, 'wb') as f:
				194	for d in content_generator:
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	195	total += len(d)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	196	f.write(d)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	197	return total
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	198
				199
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	200	def zip_compress(content_generator, level=7):
				201	"""Reads chunks from \|content_generator\| and yields zip compressed chunks."""
				202	compressor = zlib.compressobj(level)
				203	for chunk in content_generator:
				204	compressed = compressor.compress(chunk)
				205	if compressed:
				206	yield compressed
				207	tail = compressor.flush(zlib.Z_FINISH)
				208	if tail:
				209	yield tail
				210
				211
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	212	def zip_decompress(content_generator, chunk_size=DISK_FILE_CHUNK):
				213	"""Reads zipped data from \|content_generator\| and yields decompressed data.
				214
				215	Decompresses data in small chunks (no larger than \|chunk_size\|) so that
				216	zip bomb file doesn't cause zlib to preallocate huge amount of memory.
				217
				218	Raises IOError if data is corrupted or incomplete.
				219	"""
				220	decompressor = zlib.decompressobj()
				221	compressed_size = 0
				222	try:
				223	for chunk in content_generator:
				224	compressed_size += len(chunk)
				225	data = decompressor.decompress(chunk, chunk_size)
				226	if data:
				227	yield data
				228	while decompressor.unconsumed_tail:
				229	data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
				230	if data:
				231	yield data
				232	tail = decompressor.flush()
				233	if tail:
				234	yield tail
				235	except zlib.error as e:
				236	raise IOError(
				237	'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
				238	# Ensure all data was read and decompressed.
				239	if decompressor.unused_data or decompressor.unconsumed_tail:
				240	raise IOError('Not all data was decompressed')
				241
				242
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	243	def get_zip_compression_level(filename):
				244	"""Given a filename calculates the ideal zip compression level to use."""
				245	file_ext = os.path.splitext(filename)[1].lower()
				246	# TODO(csharp): Profile to find what compression level works best.
				247	return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
				248
				249
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	250	def create_directories(base_directory, files):
				251	"""Creates the directory structure needed by the given list of files."""
				252	logging.debug('create_directories(%s, %d)', base_directory, len(files))
				253	# Creates the tree of directories to create.
				254	directories = set(os.path.dirname(f) for f in files)
				255	for item in list(directories):
				256	while item:
				257	directories.add(item)
				258	item = os.path.dirname(item)
				259	for d in sorted(directories):
				260	if d:
				261	os.mkdir(os.path.join(base_directory, d))
				262
				263
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	264	def create_symlinks(base_directory, files):
				265	"""Creates any symlinks needed by the given set of files."""
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	266	for filepath, properties in files:
				267	if 'l' not in properties:
				268	continue
				269	if sys.platform == 'win32':
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	270	# TODO(maruel): Create symlink via the win32 api.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	271	logging.warning('Ignoring symlink %s', filepath)
				272	continue
				273	outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	274	# os.symlink() doesn't exist on Windows.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	275	os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	276
				277
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	278	def is_valid_file(filepath, size):
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	279	"""Determines if the given files appears valid.
				280
				281	Currently it just checks the file's size.
				282	"""
				283	if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	284	return os.path.isfile(filepath)
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	285	actual_size = os.stat(filepath).st_size
				286	if size != actual_size:
				287	logging.warning(
				288	'Found invalid item %s; %d != %d',
				289	os.path.basename(filepath), actual_size, size)
				290	return False
				291	return True
				292
				293
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	294	class WorkerPool(threading_utils.AutoRetryThreadPool):
				295	"""Thread pool that automatically retries on IOError and runs a preconfigured
				296	function.
				297	"""
				298	# Initial and maximum number of worker threads.
				299	INITIAL_WORKERS = 2
				300	MAX_WORKERS = 16
				301	RETRIES = 5
				302
				303	def __init__(self):
				304	super(WorkerPool, self).__init__(
				305	[IOError],
				306	self.RETRIES,
				307	self.INITIAL_WORKERS,
				308	self.MAX_WORKERS,
				309	0,
				310	'remote')
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	311
				312
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	313	class Item(object):
				314	"""An item to push to Storage.
				315
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	316	Its digest and size may be provided in advance, if known. Otherwise they will
				317	be derived from content(). If digest is provided, it MUST correspond to
				318	hash algorithm used by Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	319
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	320	When used with Storage, Item starts its life in a main thread, travels
				321	to 'contains' thread, then to 'push' thread and then finally back to
				322	the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	323	"""
				324
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	325	def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	326	self.digest = digest
				327	self.size = size
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	328	self.high_priority = high_priority
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	329	self.compression_level = 6
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	330
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	331	def content(self):
				332	"""Iterable with content of this item as byte string (str) chunks."""
				333	raise NotImplementedError()
				334
				335	def prepare(self, hash_algo):
				336	"""Ensures self.digest and self.size are set.
				337
				338	Uses content() as a source of data to calculate them. Does nothing if digest
				339	and size is already known.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	340
				341	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	342	hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	343	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	344	if self.digest is None or self.size is None:
				345	digest = hash_algo()
				346	total = 0
				347	for chunk in self.content():
				348	digest.update(chunk)
				349	total += len(chunk)
				350	self.digest = digest.hexdigest()
				351	self.size = total
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	352
				353
				354	class FileItem(Item):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	355	"""A file to push to Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	356
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	357	Its digest and size may be provided in advance, if known. Otherwise they will
				358	be derived from the file content.
				359	"""
				360
				361	def __init__(self, path, digest=None, size=None, high_priority=False):
				362	super(FileItem, self).__init__(
				363	digest,
				364	size if size is not None else os.stat(path).st_size,
				365	high_priority)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	366	self.path = path
				367	self.compression_level = get_zip_compression_level(path)
				368
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	369	def content(self):
				370	return file_read(self.path)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	371
				372
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	373	class BufferItem(Item):
				374	"""A byte buffer to push to Storage."""
				375
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	376	def __init__(self, buf, high_priority=False):
				377	super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	378	self.buffer = buf
				379
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	380	def content(self):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	381	return [self.buffer]
				382
				383
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	384	class Storage(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	385	"""Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	386
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	387	Implements compression support, parallel 'contains' checks, parallel uploads
				388	and more.
				389
				390	Works only within single namespace (and thus hashing algorithm and compression
				391	scheme are fixed).
				392
				393	Spawns multiple internal threads. Thread safe, but not fork safe.
				394	"""
				395
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	396	def __init__(self, storage_api):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	397	self._storage_api = storage_api
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	398	self._use_zip = is_namespace_with_compression(storage_api.namespace)
				399	self._hash_algo = get_hash_algo(storage_api.namespace)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	400	self._cpu_thread_pool = None
				401	self._net_thread_pool = None
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	402
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	403	@property
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	404	def hash_algo(self):
				405	"""Hashing algorithm used to name files in storage based on their content.
				406
				407	Defined by \|namespace\|. See also 'get_hash_algo'.
				408	"""
				409	return self._hash_algo
				410
				411	@property
				412	def location(self):
				413	"""Location of a backing store that this class is using.
				414
				415	Exact meaning depends on the storage_api type. For IsolateServer it is
				416	an URL of isolate server, for FileSystem is it a path in file system.
				417	"""
				418	return self._storage_api.location
				419
				420	@property
				421	def namespace(self):
				422	"""Isolate namespace used by this storage.
				423
				424	Indirectly defines hashing scheme and compression method used.
				425	"""
				426	return self._storage_api.namespace
				427
				428	@property
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	429	def cpu_thread_pool(self):
				430	"""ThreadPool for CPU-bound tasks like zipping."""
				431	if self._cpu_thread_pool is None:
				432	self._cpu_thread_pool = threading_utils.ThreadPool(
				433	2, max(threading_utils.num_processors(), 2), 0, 'zip')
				434	return self._cpu_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	435
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	436	@property
				437	def net_thread_pool(self):
				438	"""AutoRetryThreadPool for IO-bound tasks, retries IOError."""
				439	if self._net_thread_pool is None:
				440	self._net_thread_pool = WorkerPool()
				441	return self._net_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	442
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	443	def close(self):
				444	"""Waits for all pending tasks to finish."""
				445	if self._cpu_thread_pool:
				446	self._cpu_thread_pool.join()
				447	self._cpu_thread_pool.close()
				448	self._cpu_thread_pool = None
				449	if self._net_thread_pool:
				450	self._net_thread_pool.join()
				451	self._net_thread_pool.close()
				452	self._net_thread_pool = None
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	453
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	454	def __enter__(self):
				455	"""Context manager interface."""
				456	return self
				457
				458	def __exit__(self, _exc_type, _exc_value, _traceback):
				459	"""Context manager interface."""
				460	self.close()
				461	return False
				462
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	463	def upload_items(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	464	"""Uploads a bunch of items to the isolate server.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	465
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	466	It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	467
				468	Arguments:
				469	items: list of Item instances that represents data to upload.
				470
				471	Returns:
				472	List of items that were uploaded. All other items are already there.
				473	"""
				474	# TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
				475	# used by swarming.py. There's no need to spawn multiple threads and try to
				476	# do stuff in parallel: there's nothing to parallelize. 'contains' check and
				477	# 'push' should be performed sequentially in the context of current thread.
				478
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	479	# Ensure all digests are calculated.
				480	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	481	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	482
vadimsh@chromium.org	672cd2b	2013-10-08 17:49:33 +0000	[diff] [blame]	483	# For each digest keep only first Item that matches it. All other items
				484	# are just indistinguishable copies from the point of view of isolate
				485	# server (it doesn't care about paths at all, only content and digests).
				486	seen = {}
				487	duplicates = 0
				488	for item in items:
				489	if seen.setdefault(item.digest, item) is not item:
				490	duplicates += 1
				491	items = seen.values()
				492	if duplicates:
				493	logging.info('Skipped %d duplicated files', duplicates)
				494
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	495	# Enqueue all upload tasks.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	496	missing = set()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	497	uploaded = []
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	498	channel = threading_utils.TaskChannel()
				499	for missing_item, push_state in self.get_missing_items(items):
				500	missing.add(missing_item)
				501	self.async_push(channel, missing_item, push_state)
				502
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	503	# No need to spawn deadlock detector thread if there's nothing to upload.
				504	if missing:
				505	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
				506	# Wait for all started uploads to finish.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	507	while len(uploaded) != len(missing):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	508	detector.ping()
				509	item = channel.pull()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	510	uploaded.append(item)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	511	logging.debug(
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	512	'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	513	logging.info('All files are uploaded')
				514
				515	# Print stats.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	516	total = len(items)
				517	total_size = sum(f.size for f in items)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	518	logging.info(
				519	'Total: %6d, %9.1fkb',
				520	total,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	521	total_size / 1024.)
				522	cache_hit = set(items) - missing
				523	cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	524	logging.info(
				525	'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				526	len(cache_hit),
				527	cache_hit_size / 1024.,
				528	len(cache_hit) * 100. / total,
				529	cache_hit_size * 100. / total_size if total_size else 0)
				530	cache_miss = missing
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	531	cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	532	logging.info(
				533	'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				534	len(cache_miss),
				535	cache_miss_size / 1024.,
				536	len(cache_miss) * 100. / total,
				537	cache_miss_size * 100. / total_size if total_size else 0)
				538
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	539	return uploaded
				540
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	541	def get_fetch_url(self, item):
				542	"""Returns an URL that can be used to fetch given item once it's uploaded.
				543
				544	Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	545
				546	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	547	item: Item to get fetch URL for.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	548
				549	Returns:
				550	An URL or None if underlying protocol doesn't support this.
				551	"""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	552	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	553	return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	554
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	555	def async_push(self, channel, item, push_state):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	556	"""Starts asynchronous push to the server in a parallel thread.
				557
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	558	Can be used only after \|item\| was checked for presence on a server with
				559	'get_missing_items' call. 'get_missing_items' returns \|push_state\| object
				560	that contains storage specific information describing how to upload
				561	the item (for example in case of cloud storage, it is signed upload URLs).
				562
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	563	Arguments:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	564	channel: TaskChannel that receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	565	item: item to upload as instance of Item class.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	566	push_state: push state returned by 'get_missing_items' call for \|item\|.
				567
				568	Returns:
				569	None, but \|channel\| later receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	570	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	571	# Thread pool task priority.
				572	priority = WorkerPool.HIGH if item.high_priority else WorkerPool.MED
				573
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	574	def push(content):
Marc-Antoine Ruel	095a8be	2014-03-21 14:58:19 -0400	[diff] [blame]	575	"""Pushes an Item and returns it to \|channel\|."""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	576	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	577	self._storage_api.push(item, push_state, content)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	578	return item
				579
				580	# If zipping is not required, just start a push task.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	581	if not self._use_zip:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	582	self.net_thread_pool.add_task_with_channel(
				583	channel, priority, push, item.content())
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	584	return
				585
				586	# If zipping is enabled, zip in a separate thread.
				587	def zip_and_push():
				588	# TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
				589	# content right here. It will block until all file is zipped.
				590	try:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	591	stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	592	data = ''.join(stream)
				593	except Exception as exc:
				594	logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	595	channel.send_exception()
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	596	return
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	597	self.net_thread_pool.add_task_with_channel(
				598	channel, priority, push, [data])
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	599	self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	600
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	601	def push(self, item, push_state):
				602	"""Synchronously pushes a single item to the server.
				603
				604	If you need to push many items at once, consider using 'upload_items' or
				605	'async_push' with instance of TaskChannel.
				606
				607	Arguments:
				608	item: item to upload as instance of Item class.
				609	push_state: push state returned by 'get_missing_items' call for \|item\|.
				610
				611	Returns:
				612	Pushed item (same object as \|item\|).
				613	"""
				614	channel = threading_utils.TaskChannel()
				615	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
				616	self.async_push(channel, item, push_state)
				617	pushed = channel.pull()
				618	assert pushed is item
				619	return item
				620
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	621	def async_fetch(self, channel, priority, digest, size, sink):
				622	"""Starts asynchronous fetch from the server in a parallel thread.
				623
				624	Arguments:
				625	channel: TaskChannel that receives back \|digest\| when download ends.
				626	priority: thread pool task priority for the fetch.
				627	digest: hex digest of an item to download.
				628	size: expected size of the item (after decompression).
				629	sink: function that will be called as sink(generator).
				630	"""
				631	def fetch():
				632	try:
				633	# Prepare reading pipeline.
				634	stream = self._storage_api.fetch(digest)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	635	if self._use_zip:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	636	stream = zip_decompress(stream, DISK_FILE_CHUNK)
				637	# Run \|stream\| through verifier that will assert its size.
				638	verifier = FetchStreamVerifier(stream, size)
				639	# Verified stream goes to \|sink\|.
				640	sink(verifier.run())
				641	except Exception as err:
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	642	logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	643	raise
				644	return digest
				645
				646	# Don't bother with zip_thread_pool for decompression. Decompression is
				647	# really fast and most probably IO bound anyway.
				648	self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
				649
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	650	def get_missing_items(self, items):
				651	"""Yields items that are missing from the server.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	652
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	653	Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	654
				655	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	656	items: a list of Item objects to check.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	657
				658	Yields:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	659	For each missing item it yields a pair (item, push_state), where:
				660	* item - Item object that is missing (one of \|items\|).
				661	* push_state - opaque object that contains storage specific information
				662	describing how to upload the item (for example in case of cloud
				663	storage, it is signed upload URLs). It can later be passed to
				664	'async_push'.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	665	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	666	channel = threading_utils.TaskChannel()
				667	pending = 0
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	668
				669	# Ensure all digests are calculated.
				670	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	671	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	672
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	673	# Enqueue all requests.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	674	for batch in batch_items_for_check(items):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	675	self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
				676	self._storage_api.contains, batch)
				677	pending += 1
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	678
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	679	# Yield results as they come in.
				680	for _ in xrange(pending):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	681	for missing_item, push_state in channel.pull().iteritems():
				682	yield missing_item, push_state
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	683
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	684
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	685	def batch_items_for_check(items):
				686	"""Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	687
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	688	Each batch corresponds to a single 'exists?' query to the server via a call
				689	to StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	690
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	691	Arguments:
				692	items: a list of Item objects.
				693
				694	Yields:
				695	Batches of items to query for existence in a single operation,
				696	each batch is a list of Item objects.
				697	"""
				698	batch_count = 0
				699	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
				700	next_queries = []
				701	for item in sorted(items, key=lambda x: x.size, reverse=True):
				702	next_queries.append(item)
				703	if len(next_queries) == batch_size_limit:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	704	yield next_queries
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	705	next_queries = []
				706	batch_count += 1
				707	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
				708	min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
				709	if next_queries:
				710	yield next_queries
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	711
				712
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	713	class FetchQueue(object):
				714	"""Fetches items from Storage and places them into LocalCache.
				715
				716	It manages multiple concurrent fetch operations. Acts as a bridge between
				717	Storage and LocalCache so that Storage and LocalCache don't depend on each
				718	other at all.
				719	"""
				720
				721	def __init__(self, storage, cache):
				722	self.storage = storage
				723	self.cache = cache
				724	self._channel = threading_utils.TaskChannel()
				725	self._pending = set()
				726	self._accessed = set()
				727	self._fetched = cache.cached_set()
				728
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	729	def add(self, digest, size=UNKNOWN_FILE_SIZE, priority=WorkerPool.MED):
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	730	"""Starts asynchronous fetch of item \|digest\|."""
				731	# Fetching it now?
				732	if digest in self._pending:
				733	return
				734
				735	# Mark this file as in use, verify_all_cached will later ensure it is still
				736	# in cache.
				737	self._accessed.add(digest)
				738
				739	# Already fetched? Notify cache to update item's LRU position.
				740	if digest in self._fetched:
				741	# 'touch' returns True if item is in cache and not corrupted.
				742	if self.cache.touch(digest, size):
				743	return
				744	# Item is corrupted, remove it from cache and fetch it again.
				745	self._fetched.remove(digest)
				746	self.cache.evict(digest)
				747
				748	# TODO(maruel): It should look at the free disk space, the current cache
				749	# size and the size of the new item on every new item:
				750	# - Trim the cache as more entries are listed when free disk space is low,
				751	# otherwise if the amount of data downloaded during the run > free disk
				752	# space, it'll crash.
				753	# - Make sure there's enough free disk space to fit all dependencies of
				754	# this run! If not, abort early.
				755
				756	# Start fetching.
				757	self._pending.add(digest)
				758	self.storage.async_fetch(
				759	self._channel, priority, digest, size,
				760	functools.partial(self.cache.write, digest))
				761
				762	def wait(self, digests):
				763	"""Starts a loop that waits for at least one of \|digests\| to be retrieved.
				764
				765	Returns the first digest retrieved.
				766	"""
				767	# Flush any already fetched items.
				768	for digest in digests:
				769	if digest in self._fetched:
				770	return digest
				771
				772	# Ensure all requested items are being fetched now.
				773	assert all(digest in self._pending for digest in digests), (
				774	digests, self._pending)
				775
				776	# Wait for some requested item to finish fetching.
				777	while self._pending:
				778	digest = self._channel.pull()
				779	self._pending.remove(digest)
				780	self._fetched.add(digest)
				781	if digest in digests:
				782	return digest
				783
				784	# Should never reach this point due to assert above.
				785	raise RuntimeError('Impossible state')
				786
				787	def inject_local_file(self, path, algo):
				788	"""Adds local file to the cache as if it was fetched from storage."""
				789	with open(path, 'rb') as f:
				790	data = f.read()
				791	digest = algo(data).hexdigest()
				792	self.cache.write(digest, [data])
				793	self._fetched.add(digest)
				794	return digest
				795
				796	@property
				797	def pending_count(self):
				798	"""Returns number of items to be fetched."""
				799	return len(self._pending)
				800
				801	def verify_all_cached(self):
				802	"""True if all accessed items are in cache."""
				803	return self._accessed.issubset(self.cache.cached_set())
				804
				805
				806	class FetchStreamVerifier(object):
				807	"""Verifies that fetched file is valid before passing it to the LocalCache."""
				808
				809	def __init__(self, stream, expected_size):
				810	self.stream = stream
				811	self.expected_size = expected_size
				812	self.current_size = 0
				813
				814	def run(self):
				815	"""Generator that yields same items as \|stream\|.
				816
				817	Verifies \|stream\| is complete before yielding a last chunk to consumer.
				818
				819	Also wraps IOError produced by consumer into MappingError exceptions since
				820	otherwise Storage will retry fetch on unrelated local cache errors.
				821	"""
				822	# Read one chunk ahead, keep it in \|stored\|.
				823	# That way a complete stream can be verified before pushing last chunk
				824	# to consumer.
				825	stored = None
				826	for chunk in self.stream:
				827	assert chunk is not None
				828	if stored is not None:
				829	self._inspect_chunk(stored, is_last=False)
				830	try:
				831	yield stored
				832	except IOError as exc:
				833	raise MappingError('Failed to store an item in cache: %s' % exc)
				834	stored = chunk
				835	if stored is not None:
				836	self._inspect_chunk(stored, is_last=True)
				837	try:
				838	yield stored
				839	except IOError as exc:
				840	raise MappingError('Failed to store an item in cache: %s' % exc)
				841
				842	def _inspect_chunk(self, chunk, is_last):
				843	"""Called for each fetched chunk before passing it to consumer."""
				844	self.current_size += len(chunk)
				845	if (is_last and (self.expected_size != UNKNOWN_FILE_SIZE) and
				846	(self.expected_size != self.current_size)):
				847	raise IOError('Incorrect file size: expected %d, got %d' % (
				848	self.expected_size, self.current_size))
				849
				850
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	851	class StorageApi(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	852	"""Interface for classes that implement low-level storage operations.
				853
				854	StorageApi is oblivious of compression and hashing scheme used. This details
				855	are handled in higher level Storage class.
				856
				857	Clients should generally not use StorageApi directly. Storage class is
				858	preferred since it implements compression and upload optimizations.
				859	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	860
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	861	@property
				862	def location(self):
				863	"""Location of a backing store that this class is using.
				864
				865	Exact meaning depends on the type. For IsolateServer it is an URL of isolate
				866	server, for FileSystem is it a path in file system.
				867	"""
				868	raise NotImplementedError()
				869
				870	@property
				871	def namespace(self):
				872	"""Isolate namespace used by this storage.
				873
				874	Indirectly defines hashing scheme and compression method used.
				875	"""
				876	raise NotImplementedError()
				877
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	878	def get_fetch_url(self, digest):
				879	"""Returns an URL that can be used to fetch an item with given digest.
				880
				881	Arguments:
				882	digest: hex digest of item to fetch.
				883
				884	Returns:
				885	An URL or None if the protocol doesn't support this.
				886	"""
				887	raise NotImplementedError()
				888
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	889	def fetch(self, digest, offset=0):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	890	"""Fetches an object and yields its content.
				891
				892	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	893	digest: hash digest of item to download.
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	894	offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	895
				896	Yields:
				897	Chunks of downloaded item (as str objects).
				898	"""
				899	raise NotImplementedError()
				900
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	901	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	902	"""Uploads an \|item\| with content generated by \|content\| generator.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	903
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	904	\|item\| MUST go through 'contains' call to get \|push_state\| before it can
				905	be pushed to the storage.
				906
				907	To be clear, here is one possible usage:
				908	all_items = [... all items to push as Item subclasses ...]
				909	for missing_item, push_state in storage_api.contains(all_items).items():
				910	storage_api.push(missing_item, push_state)
				911
				912	When pushing to a namespace with compression, data that should be pushed
				913	and data provided by the item is not the same. In that case \|content\| is
				914	not None and it yields chunks of compressed data (using item.content() as
				915	a source of original uncompressed data). This is implemented by Storage
				916	class.
				917
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	918	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	919	item: Item object that holds information about an item being pushed.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	920	push_state: push state object as returned by 'contains' call.
				921	content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	922
				923	Returns:
				924	None.
				925	"""
				926	raise NotImplementedError()
				927
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	928	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	929	"""Checks for \|items\| on the server, prepares missing ones for upload.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	930
				931	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	932	items: list of Item objects to check for presence.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	933
				934	Returns:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	935	A dict missing Item -> opaque push state object to be passed to 'push'.
				936	See doc string for 'push'.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	937	"""
				938	raise NotImplementedError()
				939
				940
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	941	class _IsolateServerPushState(object):
				942	"""Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	943
				944	Note this needs to be a global class to support pickling.
				945	"""
				946
				947	def __init__(self, upload_url, finalize_url):
				948	self.upload_url = upload_url
				949	self.finalize_url = finalize_url
				950	self.uploaded = False
				951	self.finalized = False
				952
				953
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	954	class IsolateServer(StorageApi):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	955	"""StorageApi implementation that downloads and uploads to Isolate Server.
				956
				957	It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	958	Works only within single namespace.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	959	"""
				960
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	961	def __init__(self, base_url, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	962	super(IsolateServer, self).__init__()
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	963	assert base_url.startswith('http'), base_url
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	964	self._base_url = base_url.rstrip('/')
				965	self._namespace = namespace
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	966	self._lock = threading.Lock()
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	967	self._server_caps = None
				968
				969	@staticmethod
				970	def _generate_handshake_request():
				971	"""Returns a dict to be sent as handshake request body."""
				972	# TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
				973	return {
				974	'client_app_version': __version__,
				975	'fetcher': True,
				976	'protocol_version': ISOLATE_PROTOCOL_VERSION,
				977	'pusher': True,
				978	}
				979
				980	@staticmethod
				981	def _validate_handshake_response(caps):
				982	"""Validates and normalizes handshake response."""
				983	logging.info('Protocol version: %s', caps['protocol_version'])
				984	logging.info('Server version: %s', caps['server_app_version'])
				985	if caps.get('error'):
				986	raise MappingError(caps['error'])
				987	if not caps['access_token']:
				988	raise ValueError('access_token is missing')
				989	return caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	990
				991	@property
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	992	def _server_capabilities(self):
				993	"""Performs handshake with the server if not yet done.
				994
				995	Returns:
				996	Server capabilities dictionary as returned by /handshake endpoint.
				997
				998	Raises:
				999	MappingError if server rejects the handshake.
				1000	"""
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	1001	# TODO(maruel): Make this request much earlier asynchronously while the
				1002	# files are being enumerated.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1003
				1004	# TODO(vadimsh): Put \|namespace\| in the URL so that server can apply
				1005	# namespace-level ACLs to this call.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1006	with self._lock:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1007	if self._server_caps is None:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1008	try:
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1009	caps = net.url_read_json(
				1010	url=self._base_url + '/content-gs/handshake',
				1011	data=self._generate_handshake_request())
				1012	if caps is None:
				1013	raise MappingError('Failed to perform handshake.')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1014	if not isinstance(caps, dict):
				1015	raise ValueError('Expecting JSON dict')
				1016	self._server_caps = self._validate_handshake_response(caps)
				1017	except (ValueError, KeyError, TypeError) as exc:
				1018	# KeyError exception has very confusing str conversion: it's just a
				1019	# missing key value and nothing else. So print exception class name
				1020	# as well.
				1021	raise MappingError('Invalid handshake response (%s): %s' % (
				1022	exc.__class__.__name__, exc))
				1023	return self._server_caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1024
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1025	@property
				1026	def location(self):
				1027	return self._base_url
				1028
				1029	@property
				1030	def namespace(self):
				1031	return self._namespace
				1032
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1033	def get_fetch_url(self, digest):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1034	assert isinstance(digest, basestring)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1035	return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1036	self._base_url, self._namespace, digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1037
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1038	def fetch(self, digest, offset=0):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1039	source_url = self.get_fetch_url(digest)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1040	logging.debug('download_file(%s, %d)', source_url, offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1041
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1042	connection = net.url_open(
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1043	source_url,
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1044	read_timeout=DOWNLOAD_READ_TIMEOUT,
				1045	headers={'Range': 'bytes=%d-' % offset} if offset else None)
				1046
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1047	if not connection:
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	1048	raise IOError('Request failed - %s' % source_url)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1049
				1050	# If \|offset\| is used, verify server respects it by checking Content-Range.
				1051	if offset:
				1052	content_range = connection.get_header('Content-Range')
				1053	if not content_range:
				1054	raise IOError('Missing Content-Range header')
				1055
				1056	# 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
				1057	# According to a spec, <size> can be '*' meaning "Total size of the file
				1058	# is not known in advance".
				1059	try:
				1060	match = re.match(r'bytes (\d+)-(\d+)/(\d+\|\*)', content_range)
				1061	if not match:
				1062	raise ValueError()
				1063	content_offset = int(match.group(1))
				1064	last_byte_index = int(match.group(2))
				1065	size = None if match.group(3) == '*' else int(match.group(3))
				1066	except ValueError:
				1067	raise IOError('Invalid Content-Range header: %s' % content_range)
				1068
				1069	# Ensure returned offset equals requested one.
				1070	if offset != content_offset:
				1071	raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
				1072	offset, content_offset, content_range))
				1073
				1074	# Ensure entire tail of the file is returned.
				1075	if size is not None and last_byte_index + 1 != size:
				1076	raise IOError('Incomplete response. Content-Range: %s' % content_range)
				1077
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1078	return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1079
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1080	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1081	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1082	assert item.digest is not None
				1083	assert item.size is not None
				1084	assert isinstance(push_state, _IsolateServerPushState)
				1085	assert not push_state.finalized
				1086
				1087	# Default to item.content().
				1088	content = item.content() if content is None else content
				1089
				1090	# Do not iterate byte by byte over 'str'. Push it all as a single chunk.
				1091	if isinstance(content, basestring):
				1092	assert not isinstance(content, unicode), 'Unicode string is not allowed'
				1093	content = [content]
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1094
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1095	# TODO(vadimsh): Do not read from \|content\| generator when retrying push.
				1096	# If \|content\| is indeed a generator, it can not be re-winded back
				1097	# to the beginning of the stream. A retry will find it exhausted. A possible
				1098	# solution is to wrap \|content\| generator with some sort of caching
				1099	# restartable generator. It should be done alongside streaming support
				1100	# implementation.
				1101
				1102	# This push operation may be a retry after failed finalization call below,
				1103	# no need to reupload contents in that case.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1104	if not push_state.uploaded:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1105	# A cheezy way to avoid memcpy of (possibly huge) file, until streaming
				1106	# upload support is implemented.
				1107	if isinstance(content, list) and len(content) == 1:
				1108	content = content[0]
				1109	else:
				1110	content = ''.join(content)
				1111	# PUT file to \|upload_url\|.
				1112	response = net.url_read(
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1113	url=push_state.upload_url,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1114	data=content,
				1115	content_type='application/octet-stream',
				1116	method='PUT')
				1117	if response is None:
				1118	raise IOError('Failed to upload a file %s to %s' % (
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1119	item.digest, push_state.upload_url))
				1120	push_state.uploaded = True
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1121	else:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1122	logging.info(
				1123	'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1124
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1125	# Optionally notify the server that it's done.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1126	if push_state.finalize_url:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1127	# TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
				1128	# send it to isolated server. That way isolate server can verify that
				1129	# the data safely reached Google Storage (GS provides MD5 and CRC32C of
				1130	# stored files).
Marc-Antoine Ruel	c1c2ccc	2014-08-13 19:18:49 -0400	[diff] [blame^]	1131	# TODO(maruel): Fix the server to accept propery data={} so
				1132	# url_read_json() can be used.
				1133	response = net.url_read(
				1134	url=push_state.finalize_url,
				1135	data='',
				1136	content_type='application/json',
				1137	method='POST')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1138	if response is None:
				1139	raise IOError('Failed to finalize an upload of %s' % item.digest)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1140	push_state.finalized = True
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1141
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1142	def contains(self, items):
				1143	logging.info('Checking existence of %d files...', len(items))
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1144
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1145	# Ensure all items were initialized with 'prepare' call. Storage does that.
				1146	assert all(i.digest is not None and i.size is not None for i in items)
				1147
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1148	# Request body is a json encoded list of dicts.
				1149	body = [
				1150	{
				1151	'h': item.digest,
				1152	's': item.size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1153	'i': int(item.high_priority),
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1154	} for item in items
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1155	]
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1156
				1157	query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1158	self._base_url,
				1159	self._namespace,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1160	urllib.quote(self._server_capabilities['access_token']))
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1161
				1162	# Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1163	response = None
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1164	try:
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1165	response = net.url_read_json(url=query_url, data=body)
				1166	if response is None:
				1167	raise MappingError('Failed to execute /pre-upload query')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1168	if not isinstance(response, list):
				1169	raise ValueError('Expecting response with json-encoded list')
				1170	if len(response) != len(items):
				1171	raise ValueError(
				1172	'Incorrect number of items in the list, expected %d, '
				1173	'but got %d' % (len(items), len(response)))
				1174	except ValueError as err:
				1175	raise MappingError(
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1176	'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1177
				1178	# Pick Items that are missing, attach _PushState to them.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1179	missing_items = {}
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1180	for i, push_urls in enumerate(response):
				1181	if push_urls:
				1182	assert len(push_urls) == 2, str(push_urls)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1183	missing_items[items[i]] = _IsolateServerPushState(
				1184	push_urls[0], push_urls[1])
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1185	logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1186	len(items), len(items) - len(missing_items))
				1187	return missing_items
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1188
				1189
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1190	class FileSystem(StorageApi):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1191	"""StorageApi implementation that fetches data from the file system.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1192
				1193	The common use case is a NFS/CIFS file server that is mounted locally that is
				1194	used to fetch the file on a local partition.
				1195	"""
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1196
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1197	# Used for push_state instead of None. That way caller is forced to
				1198	# call 'contains' before 'push'. Naively passing None in 'push' will not work.
				1199	_DUMMY_PUSH_STATE = object()
				1200
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1201	def __init__(self, base_path, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1202	super(FileSystem, self).__init__()
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1203	self._base_path = base_path
				1204	self._namespace = namespace
				1205
				1206	@property
				1207	def location(self):
				1208	return self._base_path
				1209
				1210	@property
				1211	def namespace(self):
				1212	return self._namespace
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1213
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1214	def get_fetch_url(self, digest):
				1215	return None
				1216
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1217	def fetch(self, digest, offset=0):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1218	assert isinstance(digest, basestring)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1219	return file_read(os.path.join(self._base_path, digest), offset=offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1220
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1221	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1222	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1223	assert item.digest is not None
				1224	assert item.size is not None
				1225	assert push_state is self._DUMMY_PUSH_STATE
				1226	content = item.content() if content is None else content
				1227	if isinstance(content, basestring):
				1228	assert not isinstance(content, unicode), 'Unicode string is not allowed'
				1229	content = [content]
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1230	file_write(os.path.join(self._base_path, item.digest), content)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1231
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1232	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1233	assert all(i.digest is not None and i.size is not None for i in items)
				1234	return dict(
				1235	(item, self._DUMMY_PUSH_STATE) for item in items
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1236	if not os.path.exists(os.path.join(self._base_path, item.digest))
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1237	)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1238
				1239
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1240	class LocalCache(object):
				1241	"""Local cache that stores objects fetched via Storage.
				1242
				1243	It can be accessed concurrently from multiple threads, so it should protect
				1244	its internal state with some lock.
				1245	"""
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1246	cache_dir = None
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1247
				1248	def __enter__(self):
				1249	"""Context manager interface."""
				1250	return self
				1251
				1252	def __exit__(self, _exc_type, _exec_value, _traceback):
				1253	"""Context manager interface."""
				1254	return False
				1255
				1256	def cached_set(self):
				1257	"""Returns a set of all cached digests (always a new object)."""
				1258	raise NotImplementedError()
				1259
				1260	def touch(self, digest, size):
				1261	"""Ensures item is not corrupted and updates its LRU position.
				1262
				1263	Arguments:
				1264	digest: hash digest of item to check.
				1265	size: expected size of this item.
				1266
				1267	Returns:
				1268	True if item is in cache and not corrupted.
				1269	"""
				1270	raise NotImplementedError()
				1271
				1272	def evict(self, digest):
				1273	"""Removes item from cache if it's there."""
				1274	raise NotImplementedError()
				1275
				1276	def read(self, digest):
				1277	"""Returns contents of the cached item as a single str."""
				1278	raise NotImplementedError()
				1279
				1280	def write(self, digest, content):
				1281	"""Reads data from \|content\| generator and stores it in cache."""
				1282	raise NotImplementedError()
				1283
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1284	def hardlink(self, digest, dest, file_mode):
				1285	"""Ensures file at \|dest\| has same content as cached \|digest\|.
				1286
				1287	If file_mode is provided, it is used to set the executable bit if
				1288	applicable.
				1289	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1290	raise NotImplementedError()
				1291
				1292
				1293	class MemoryCache(LocalCache):
				1294	"""LocalCache implementation that stores everything in memory."""
				1295
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1296	def __init__(self, file_mode_mask=0500):
				1297	"""Args:
				1298	file_mode_mask: bit mask to AND file mode with. Default value will make
				1299	all mapped files to be read only.
				1300	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1301	super(MemoryCache, self).__init__()
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1302	self._file_mode_mask = file_mode_mask
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1303	# Let's not assume dict is thread safe.
				1304	self._lock = threading.Lock()
				1305	self._contents = {}
				1306
				1307	def cached_set(self):
				1308	with self._lock:
				1309	return set(self._contents)
				1310
				1311	def touch(self, digest, size):
				1312	with self._lock:
				1313	return digest in self._contents
				1314
				1315	def evict(self, digest):
				1316	with self._lock:
				1317	self._contents.pop(digest, None)
				1318
				1319	def read(self, digest):
				1320	with self._lock:
				1321	return self._contents[digest]
				1322
				1323	def write(self, digest, content):
				1324	# Assemble whole stream before taking the lock.
				1325	data = ''.join(content)
				1326	with self._lock:
				1327	self._contents[digest] = data
				1328
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1329	def hardlink(self, digest, dest, file_mode):
				1330	"""Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1331	file_write(dest, [self.read(digest)])
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1332	if file_mode is not None:
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1333	os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1334
				1335
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1336	def get_hash_algo(_namespace):
				1337	"""Return hash algorithm class to use when uploading to given \|namespace\|."""
				1338	# TODO(vadimsh): Implement this at some point.
				1339	return hashlib.sha1
				1340
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1341
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1342	def is_namespace_with_compression(namespace):
				1343	"""Returns True if given \|namespace\| stores compressed objects."""
				1344	return namespace.endswith(('-gzip', '-deflate'))
				1345
				1346
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1347	def get_storage_api(file_or_url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1348	"""Returns an object that implements low-level StorageApi interface.
				1349
				1350	It is used by Storage to work with single isolate \|namespace\|. It should
				1351	rarely be used directly by clients, see 'get_storage' for
				1352	a better alternative.
				1353
				1354	Arguments:
				1355	file_or_url: a file path to use file system based storage, or URL of isolate
				1356	service to use shared cloud based storage.
				1357	namespace: isolate namespace to operate in, also defines hashing and
				1358	compression scheme used, i.e. namespace names that end with '-gzip'
				1359	store compressed data.
				1360
				1361	Returns:
				1362	Instance of StorageApi subclass.
				1363	"""
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	1364	if file_path.is_url(file_or_url):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1365	return IsolateServer(file_or_url, namespace)
				1366	else:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1367	return FileSystem(file_or_url, namespace)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1368
				1369
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1370	def get_storage(file_or_url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1371	"""Returns Storage class that can upload and download from \|namespace\|.
				1372
				1373	Arguments:
				1374	file_or_url: a file path to use file system based storage, or URL of isolate
				1375	service to use shared cloud based storage.
				1376	namespace: isolate namespace to operate in, also defines hashing and
				1377	compression scheme used, i.e. namespace names that end with '-gzip'
				1378	store compressed data.
				1379
				1380	Returns:
				1381	Instance of Storage.
				1382	"""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1383	return Storage(get_storage_api(file_or_url, namespace))
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1384
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1385
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1386	def expand_symlinks(indir, relfile):
				1387	"""Follows symlinks in \|relfile\|, but treating symlinks that point outside the
				1388	build tree as if they were ordinary directories/files. Returns the final
				1389	symlink-free target and a list of paths to symlinks encountered in the
				1390	process.
				1391
				1392	The rule about symlinks outside the build tree is for the benefit of the
				1393	Chromium OS ebuild, which symlinks the output directory to an unrelated path
				1394	in the chroot.
				1395
				1396	Fails when a directory loop is detected, although in theory we could support
				1397	that case.
				1398	"""
				1399	is_directory = relfile.endswith(os.path.sep)
				1400	done = indir
				1401	todo = relfile.strip(os.path.sep)
				1402	symlinks = []
				1403
				1404	while todo:
				1405	pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
				1406	done, todo)
				1407	if not symlink:
				1408	todo = file_path.fix_native_path_case(done, todo)
				1409	done = os.path.join(done, todo)
				1410	break
				1411	symlink_path = os.path.join(done, pre_symlink, symlink)
				1412	post_symlink = post_symlink.lstrip(os.path.sep)
				1413	# readlink doesn't exist on Windows.
				1414	# pylint: disable=E1101
				1415	target = os.path.normpath(os.path.join(done, pre_symlink))
				1416	symlink_target = os.readlink(symlink_path)
				1417	if os.path.isabs(symlink_target):
				1418	# Absolute path are considered a normal directories. The use case is
				1419	# generally someone who puts the output directory on a separate drive.
				1420	target = symlink_target
				1421	else:
				1422	# The symlink itself could be using the wrong path case.
				1423	target = file_path.fix_native_path_case(target, symlink_target)
				1424
				1425	if not os.path.exists(target):
				1426	raise MappingError(
				1427	'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
				1428	target = file_path.get_native_path_case(target)
				1429	if not file_path.path_starts_with(indir, target):
				1430	done = symlink_path
				1431	todo = post_symlink
				1432	continue
				1433	if file_path.path_starts_with(target, symlink_path):
				1434	raise MappingError(
				1435	'Can\'t map recursive symlink reference %s -> %s' %
				1436	(symlink_path, target))
				1437	logging.info('Found symlink: %s -> %s', symlink_path, target)
				1438	symlinks.append(os.path.relpath(symlink_path, indir))
				1439	# Treat the common prefix of the old and new paths as done, and start
				1440	# scanning again.
				1441	target = target.split(os.path.sep)
				1442	symlink_path = symlink_path.split(os.path.sep)
				1443	prefix_length = 0
				1444	for target_piece, symlink_path_piece in zip(target, symlink_path):
				1445	if target_piece == symlink_path_piece:
				1446	prefix_length += 1
				1447	else:
				1448	break
				1449	done = os.path.sep.join(target[:prefix_length])
				1450	todo = os.path.join(
				1451	os.path.sep.join(target[prefix_length:]), post_symlink)
				1452
				1453	relfile = os.path.relpath(done, indir)
				1454	relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
				1455	return relfile, symlinks
				1456
				1457
				1458	def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
				1459	"""Expands a single input. It can result in multiple outputs.
				1460
				1461	This function is recursive when relfile is a directory.
				1462
				1463	Note: this code doesn't properly handle recursive symlink like one created
				1464	with:
				1465	ln -s .. foo
				1466	"""
				1467	if os.path.isabs(relfile):
				1468	raise MappingError('Can\'t map absolute path %s' % relfile)
				1469
				1470	infile = file_path.normpath(os.path.join(indir, relfile))
				1471	if not infile.startswith(indir):
				1472	raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
				1473
				1474	filepath = os.path.join(indir, relfile)
				1475	native_filepath = file_path.get_native_path_case(filepath)
				1476	if filepath != native_filepath:
				1477	# Special case './'.
				1478	if filepath != native_filepath + '.' + os.path.sep:
Marc-Antoine Ruel	582e224	2014-06-26 15:22:06 -0400	[diff] [blame]	1479	# While it'd be nice to enforce path casing on Windows, it's impractical.
				1480	# Also give up enforcing strict path case on OSX. Really, it's that sad.
				1481	# The case where it happens is very specific and hard to reproduce:
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1482	# get_native_path_case(
				1483	# u'Foo.framework/Versions/A/Resources/Something.nib') will return
				1484	# u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
				1485	#
				1486	# Note that this is really something deep in OSX because running
				1487	# ls Foo.framework/Versions/A
				1488	# will print out 'Resources', while file_path.get_native_path_case()
				1489	# returns a lower case 'r'.
				1490	#
				1491	# So something is happening under the hood resulting in the command 'ls'
				1492	# and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
				1493	# have no idea why.
Marc-Antoine Ruel	582e224	2014-06-26 15:22:06 -0400	[diff] [blame]	1494	if sys.platform not in ('darwin', 'win32'):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1495	raise MappingError(
				1496	'File path doesn\'t equal native file path\n%s != %s' %
				1497	(filepath, native_filepath))
				1498
				1499	symlinks = []
				1500	if follow_symlinks:
				1501	relfile, symlinks = expand_symlinks(indir, relfile)
				1502
				1503	if relfile.endswith(os.path.sep):
				1504	if not os.path.isdir(infile):
				1505	raise MappingError(
				1506	'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
				1507
				1508	# Special case './'.
				1509	if relfile.startswith('.' + os.path.sep):
				1510	relfile = relfile[2:]
				1511	outfiles = symlinks
				1512	try:
				1513	for filename in os.listdir(infile):
				1514	inner_relfile = os.path.join(relfile, filename)
				1515	if blacklist and blacklist(inner_relfile):
				1516	continue
				1517	if os.path.isdir(os.path.join(indir, inner_relfile)):
				1518	inner_relfile += os.path.sep
				1519	outfiles.extend(
				1520	expand_directory_and_symlink(indir, inner_relfile, blacklist,
				1521	follow_symlinks))
				1522	return outfiles
				1523	except OSError as e:
				1524	raise MappingError(
				1525	'Unable to iterate over directory %s.\n%s' % (infile, e))
				1526	else:
				1527	# Always add individual files even if they were blacklisted.
				1528	if os.path.isdir(infile):
				1529	raise MappingError(
				1530	'Input directory %s must have a trailing slash' % infile)
				1531
				1532	if not os.path.isfile(infile):
				1533	raise MappingError('Input file %s doesn\'t exist' % infile)
				1534
				1535	return symlinks + [relfile]
				1536
				1537
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1538	def process_input(filepath, prevdict, read_only, algo):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1539	"""Processes an input file, a dependency, and return meta data about it.
				1540
				1541	Behaviors:
				1542	- Retrieves the file mode, file size, file timestamp, file link
				1543	destination if it is a file link and calcultate the SHA-1 of the file's
				1544	content if the path points to a file and not a symlink.
				1545
				1546	Arguments:
				1547	filepath: File to act on.
				1548	prevdict: the previous dictionary. It is used to retrieve the cached sha-1
				1549	to skip recalculating the hash. Optional.
Marc-Antoine Ruel	7124e39	2014-01-09 11:49:21 -0500	[diff] [blame]	1550	read_only: If 1 or 2, the file mode is manipulated. In practice, only save
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1551	one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
				1552	windows, mode is not set since all files are 'executable' by
				1553	default.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1554	algo: Hashing algorithm used.
				1555
				1556	Returns:
				1557	The necessary data to create a entry in the 'files' section of an .isolated
				1558	file.
				1559	"""
				1560	out = {}
				1561	# TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
				1562	# if prevdict.get('T') == True:
				1563	# # The file's content is ignored. Skip the time and hard code mode.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1564	# out['s'] = 0
				1565	# out['h'] = algo().hexdigest()
				1566	# out['T'] = True
				1567	# return out
				1568
				1569	# Always check the file stat and check if it is a link. The timestamp is used
				1570	# to know if the file's content/symlink destination should be looked into.
				1571	# E.g. only reuse from prevdict if the timestamp hasn't changed.
				1572	# There is the risk of the file's timestamp being reset to its last value
				1573	# manually while its content changed. We don't protect against that use case.
				1574	try:
				1575	filestats = os.lstat(filepath)
				1576	except OSError:
				1577	# The file is not present.
				1578	raise MappingError('%s is missing' % filepath)
				1579	is_link = stat.S_ISLNK(filestats.st_mode)
				1580
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1581	if sys.platform != 'win32':
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1582	# Ignore file mode on Windows since it's not really useful there.
				1583	filemode = stat.S_IMODE(filestats.st_mode)
				1584	# Remove write access for group and all access to 'others'.
				1585	filemode &= ~(stat.S_IWGRP \| stat.S_IRWXO)
				1586	if read_only:
				1587	filemode &= ~stat.S_IWUSR
				1588	if filemode & stat.S_IXUSR:
				1589	filemode \|= stat.S_IXGRP
				1590	else:
				1591	filemode &= ~stat.S_IXGRP
				1592	if not is_link:
				1593	out['m'] = filemode
				1594
				1595	# Used to skip recalculating the hash or link destination. Use the most recent
				1596	# update time.
				1597	# TODO(maruel): Save it in the .state file instead of .isolated so the
				1598	# .isolated file is deterministic.
				1599	out['t'] = int(round(filestats.st_mtime))
				1600
				1601	if not is_link:
				1602	out['s'] = filestats.st_size
				1603	# If the timestamp wasn't updated and the file size is still the same, carry
				1604	# on the sha-1.
				1605	if (prevdict.get('t') == out['t'] and
				1606	prevdict.get('s') == out['s']):
				1607	# Reuse the previous hash if available.
				1608	out['h'] = prevdict.get('h')
				1609	if not out.get('h'):
				1610	out['h'] = hash_file(filepath, algo)
				1611	else:
				1612	# If the timestamp wasn't updated, carry on the link destination.
				1613	if prevdict.get('t') == out['t']:
				1614	# Reuse the previous link destination if available.
				1615	out['l'] = prevdict.get('l')
				1616	if out.get('l') is None:
				1617	# The link could be in an incorrect path case. In practice, this only
				1618	# happen on OSX on case insensitive HFS.
				1619	# TODO(maruel): It'd be better if it was only done once, in
				1620	# expand_directory_and_symlink(), so it would not be necessary to do again
				1621	# here.
				1622	symlink_value = os.readlink(filepath) # pylint: disable=E1101
				1623	filedir = file_path.get_native_path_case(os.path.dirname(filepath))
				1624	native_dest = file_path.fix_native_path_case(filedir, symlink_value)
				1625	out['l'] = os.path.relpath(native_dest, filedir)
				1626	return out
				1627
				1628
				1629	def save_isolated(isolated, data):
				1630	"""Writes one or multiple .isolated files.
				1631
				1632	Note: this reference implementation does not create child .isolated file so it
				1633	always returns an empty list.
				1634
				1635	Returns the list of child isolated files that are included by \|isolated\|.
				1636	"""
				1637	# Make sure the data is valid .isolated data by 'reloading' it.
				1638	algo = SUPPORTED_ALGOS[data['algo']]
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1639	load_isolated(json.dumps(data), algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1640	tools.write_json(isolated, data, True)
				1641	return []
				1642
				1643
maruel@chromium.org	7b844a6	2013-09-17 13:04:59 +0000	[diff] [blame]	1644	def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1645	"""Uploads the given tree to the given url.
				1646
				1647	Arguments:
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	1648	base_url: The base url, it is assume that \|base_url\|/has/ can be used to
				1649	query if an element was already uploaded, and \|base_url\|/store/
				1650	can be used to upload a new element.
				1651	indir: Root directory the infiles are based in.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1652	infiles: dict of files to upload from \|indir\| to \|base_url\|.
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	1653	namespace: The namespace to use on the server.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1654	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1655	logging.info('upload_tree(indir=%s, files=%d)', indir, len(infiles))
				1656
				1657	# Convert \|indir\| + \|infiles\| into a list of FileItem objects.
				1658	# Filter out symlinks, since they are not represented by items on isolate
				1659	# server side.
				1660	items = [
				1661	FileItem(
				1662	path=os.path.join(indir, filepath),
				1663	digest=metadata['h'],
				1664	size=metadata['s'],
				1665	high_priority=metadata.get('priority') == '0')
				1666	for filepath, metadata in infiles.iteritems()
				1667	if 'l' not in metadata
				1668	]
				1669
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1670	with get_storage(base_url, namespace) as storage:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1671	storage.upload_items(items)
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	1672	return 0
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1673
				1674
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1675	def load_isolated(content, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1676	"""Verifies the .isolated file is valid and loads this object with the json
				1677	data.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1678
				1679	Arguments:
				1680	- content: raw serialized content to load.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1681	- algo: hashlib algorithm class. Used to confirm the algorithm matches the
				1682	algorithm used on the Isolate Server.
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1683	"""
				1684	try:
				1685	data = json.loads(content)
				1686	except ValueError:
				1687	raise ConfigError('Failed to parse: %s...' % content[:100])
				1688
				1689	if not isinstance(data, dict):
				1690	raise ConfigError('Expected dict, got %r' % data)
				1691
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1692	# Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1693	value = data.get('version', '1.0')
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1694	if not isinstance(value, basestring):
				1695	raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1696	try:
				1697	version = tuple(map(int, value.split('.')))
				1698	except ValueError:
				1699	raise ConfigError('Expected valid version, got %r' % value)
				1700
				1701	expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
				1702	# Major version must match.
				1703	if version[0] != expected_version[0]:
Marc-Antoine Ruel	1c1edd6	2013-12-06 09:13:13 -0500	[diff] [blame]	1704	raise ConfigError(
				1705	'Expected compatible \'%s\' version, got %r' %
				1706	(ISOLATED_FILE_VERSION, value))
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1707
				1708	if algo is None:
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	1709	# TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1710	# Default the algorithm used in the .isolated file itself, falls back to
				1711	# 'sha-1' if unspecified.
				1712	algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
				1713
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1714	for key, value in data.iteritems():
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1715	if key == 'algo':
				1716	if not isinstance(value, basestring):
				1717	raise ConfigError('Expected string, got %r' % value)
				1718	if value not in SUPPORTED_ALGOS:
				1719	raise ConfigError(
				1720	'Expected one of \'%s\', got %r' %
				1721	(', '.join(sorted(SUPPORTED_ALGOS)), value))
				1722	if value != SUPPORTED_ALGOS_REVERSE[algo]:
				1723	raise ConfigError(
				1724	'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
				1725
				1726	elif key == 'command':
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1727	if not isinstance(value, list):
				1728	raise ConfigError('Expected list, got %r' % value)
				1729	if not value:
				1730	raise ConfigError('Expected non-empty command')
				1731	for subvalue in value:
				1732	if not isinstance(subvalue, basestring):
				1733	raise ConfigError('Expected string, got %r' % subvalue)
				1734
				1735	elif key == 'files':
				1736	if not isinstance(value, dict):
				1737	raise ConfigError('Expected dict, got %r' % value)
				1738	for subkey, subvalue in value.iteritems():
				1739	if not isinstance(subkey, basestring):
				1740	raise ConfigError('Expected string, got %r' % subkey)
				1741	if not isinstance(subvalue, dict):
				1742	raise ConfigError('Expected dict, got %r' % subvalue)
				1743	for subsubkey, subsubvalue in subvalue.iteritems():
				1744	if subsubkey == 'l':
				1745	if not isinstance(subsubvalue, basestring):
				1746	raise ConfigError('Expected string, got %r' % subsubvalue)
				1747	elif subsubkey == 'm':
				1748	if not isinstance(subsubvalue, int):
				1749	raise ConfigError('Expected int, got %r' % subsubvalue)
				1750	elif subsubkey == 'h':
				1751	if not is_valid_hash(subsubvalue, algo):
				1752	raise ConfigError('Expected sha-1, got %r' % subsubvalue)
				1753	elif subsubkey == 's':
Marc-Antoine Ruel	aab3a62	2013-11-28 09:47:05 -0500	[diff] [blame]	1754	if not isinstance(subsubvalue, (int, long)):
				1755	raise ConfigError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1756	else:
				1757	raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1758	if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1759	raise ConfigError(
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1760	'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
				1761	subvalue)
				1762	if bool('h' in subvalue) != bool('s' in subvalue):
				1763	raise ConfigError(
				1764	'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
				1765	subvalue)
				1766	if bool('s' in subvalue) == bool('l' in subvalue):
				1767	raise ConfigError(
				1768	'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
				1769	subvalue)
				1770	if bool('l' in subvalue) and bool('m' in subvalue):
				1771	raise ConfigError(
				1772	'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1773	subvalue)
				1774
				1775	elif key == 'includes':
				1776	if not isinstance(value, list):
				1777	raise ConfigError('Expected list, got %r' % value)
				1778	if not value:
				1779	raise ConfigError('Expected non-empty includes list')
				1780	for subvalue in value:
				1781	if not is_valid_hash(subvalue, algo):
				1782	raise ConfigError('Expected sha-1, got %r' % subvalue)
				1783
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1784	elif key == 'os':
				1785	if version >= (1, 4):
				1786	raise ConfigError('Key \'os\' is not allowed starting version 1.4')
				1787
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1788	elif key == 'read_only':
Marc-Antoine Ruel	7124e39	2014-01-09 11:49:21 -0500	[diff] [blame]	1789	if not value in (0, 1, 2):
				1790	raise ConfigError('Expected 0, 1 or 2, got %r' % value)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1791
				1792	elif key == 'relative_cwd':
				1793	if not isinstance(value, basestring):
				1794	raise ConfigError('Expected string, got %r' % value)
				1795
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1796	elif key == 'version':
				1797	# Already checked above.
				1798	pass
				1799
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1800	else:
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1801	raise ConfigError('Unknown key %r' % key)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1802
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1803	# Automatically fix os.path.sep if necessary. While .isolated files are always
				1804	# in the the native path format, someone could want to download an .isolated
				1805	# tree from another OS.
				1806	wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
				1807	if 'files' in data:
				1808	data['files'] = dict(
				1809	(k.replace(wrong_path_sep, os.path.sep), v)
				1810	for k, v in data['files'].iteritems())
				1811	for v in data['files'].itervalues():
				1812	if 'l' in v:
				1813	v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
				1814	if 'relative_cwd' in data:
				1815	data['relative_cwd'] = data['relative_cwd'].replace(
				1816	wrong_path_sep, os.path.sep)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1817	return data
				1818
				1819
				1820	class IsolatedFile(object):
				1821	"""Represents a single parsed .isolated file."""
				1822	def __init__(self, obj_hash, algo):
				1823	"""\|obj_hash\| is really the sha-1 of the file."""
				1824	logging.debug('IsolatedFile(%s)' % obj_hash)
				1825	self.obj_hash = obj_hash
				1826	self.algo = algo
				1827	# Set once all the left-side of the tree is parsed. 'Tree' here means the
				1828	# .isolate and all the .isolated files recursively included by it with
				1829	# 'includes' key. The order of each sha-1 in 'includes', each representing a
				1830	# .isolated file in the hash table, is important, as the later ones are not
				1831	# processed until the firsts are retrieved and read.
				1832	self.can_fetch = False
				1833
				1834	# Raw data.
				1835	self.data = {}
				1836	# A IsolatedFile instance, one per object in self.includes.
				1837	self.children = []
				1838
				1839	# Set once the .isolated file is loaded.
				1840	self._is_parsed = False
				1841	# Set once the files are fetched.
				1842	self.files_fetched = False
				1843
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1844	def load(self, content):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1845	"""Verifies the .isolated file is valid and loads this object with the json
				1846	data.
				1847	"""
				1848	logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
				1849	assert not self._is_parsed
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1850	self.data = load_isolated(content, self.algo)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1851	self.children = [
				1852	IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
				1853	]
				1854	self._is_parsed = True
				1855
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1856	def fetch_files(self, fetch_queue, files):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1857	"""Adds files in this .isolated file not present in \|files\| dictionary.
				1858
				1859	Preemptively request files.
				1860
				1861	Note that \|files\| is modified by this function.
				1862	"""
				1863	assert self.can_fetch
				1864	if not self._is_parsed or self.files_fetched:
				1865	return
				1866	logging.debug('fetch_files(%s)' % self.obj_hash)
				1867	for filepath, properties in self.data.get('files', {}).iteritems():
				1868	# Root isolated has priority on the files being mapped. In particular,
				1869	# overriden files must not be fetched.
				1870	if filepath not in files:
				1871	files[filepath] = properties
				1872	if 'h' in properties:
				1873	# Preemptively request files.
				1874	logging.debug('fetching %s' % filepath)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1875	fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1876	self.files_fetched = True
				1877
				1878
				1879	class Settings(object):
				1880	"""Results of a completely parsed .isolated file."""
				1881	def __init__(self):
				1882	self.command = []
				1883	self.files = {}
				1884	self.read_only = None
				1885	self.relative_cwd = None
				1886	# The main .isolated file, a IsolatedFile instance.
				1887	self.root = None
				1888
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1889	def load(self, fetch_queue, root_isolated_hash, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1890	"""Loads the .isolated and all the included .isolated asynchronously.
				1891
				1892	It enables support for "included" .isolated files. They are processed in
				1893	strict order but fetched asynchronously from the cache. This is important so
				1894	that a file in an included .isolated file that is overridden by an embedding
				1895	.isolated file is not fetched needlessly. The includes are fetched in one
				1896	pass and the files are fetched as soon as all the ones on the left-side
				1897	of the tree were fetched.
				1898
				1899	The prioritization is very important here for nested .isolated files.
				1900	'includes' have the highest priority and the algorithm is optimized for both
				1901	deep and wide trees. A deep one is a long link of .isolated files referenced
				1902	one at a time by one item in 'includes'. A wide one has a large number of
				1903	'includes' in a single .isolated file. 'left' is defined as an included
				1904	.isolated file earlier in the 'includes' list. So the order of the elements
				1905	in 'includes' is important.
				1906	"""
				1907	self.root = IsolatedFile(root_isolated_hash, algo)
				1908
				1909	# Isolated files being retrieved now: hash -> IsolatedFile instance.
				1910	pending = {}
				1911	# Set of hashes of already retrieved items to refuse recursive includes.
				1912	seen = set()
				1913
				1914	def retrieve(isolated_file):
				1915	h = isolated_file.obj_hash
				1916	if h in seen:
				1917	raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
				1918	assert h not in pending
				1919	seen.add(h)
				1920	pending[h] = isolated_file
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1921	fetch_queue.add(h, priority=WorkerPool.HIGH)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1922
				1923	retrieve(self.root)
				1924
				1925	while pending:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1926	item_hash = fetch_queue.wait(pending)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1927	item = pending.pop(item_hash)
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1928	item.load(fetch_queue.cache.read(item_hash))
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1929	if item_hash == root_isolated_hash:
				1930	# It's the root item.
				1931	item.can_fetch = True
				1932
				1933	for new_child in item.children:
				1934	retrieve(new_child)
				1935
				1936	# Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1937	self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1938
				1939	def check(n):
				1940	return all(check(x) for x in n.children) and n.files_fetched
				1941	assert check(self.root)
				1942
				1943	self.relative_cwd = self.relative_cwd or ''
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1944
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1945	def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1946	if node.can_fetch:
				1947	if not node.files_fetched:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1948	self._update_self(fetch_queue, node)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1949	will_break = False
				1950	for i in node.children:
				1951	if not i.can_fetch:
				1952	if will_break:
				1953	break
				1954	# Automatically mark the first one as fetcheable.
				1955	i.can_fetch = True
				1956	will_break = True
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1957	self._traverse_tree(fetch_queue, i)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1958
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1959	def _update_self(self, fetch_queue, node):
				1960	node.fetch_files(fetch_queue, self.files)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1961	# Grabs properties.
				1962	if not self.command and node.data.get('command'):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1963	# Ensure paths are correctly separated on windows.
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1964	self.command = node.data['command']
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1965	if self.command:
				1966	self.command[0] = self.command[0].replace('/', os.path.sep)
				1967	self.command = tools.fix_python_path(self.command)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1968	if self.read_only is None and node.data.get('read_only') is not None:
				1969	self.read_only = node.data['read_only']
				1970	if (self.relative_cwd is None and
				1971	node.data.get('relative_cwd') is not None):
				1972	self.relative_cwd = node.data['relative_cwd']
				1973
				1974
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1975	def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1976	"""Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1977
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1978	Arguments:
				1979	isolated_hash: hash of the root *.isolated file.
				1980	storage: Storage class that communicates with isolate storage.
				1981	cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1982	outdir: Output directory to map file tree to.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1983	require_command: Ensure *.isolated specifies a command to run.
				1984
				1985	Returns:
				1986	Settings object that holds details about loaded *.isolated file.
				1987	"""
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	1988	logging.debug(
				1989	'fetch_isolated(%s, %s, %s, %s, %s)',
				1990	isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1991	# Hash algorithm to use, defined by namespace \|storage\| is using.
				1992	algo = storage.hash_algo
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1993	with cache:
				1994	fetch_queue = FetchQueue(storage, cache)
				1995	settings = Settings()
				1996
				1997	with tools.Profiler('GetIsolateds'):
				1998	# Optionally support local files by manually adding them to cache.
				1999	if not is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	2000	logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
				2001	try:
				2002	isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
				2003	except IOError:
				2004	raise MappingError(
				2005	'%s doesn\'t seem to be a valid file. Did you intent to pass a '
				2006	'valid hash?' % isolated_hash)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2007
				2008	# Load all *.isolated and start loading rest of the files.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	2009	settings.load(fetch_queue, isolated_hash, algo)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2010	if require_command and not settings.command:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2011	# TODO(vadimsh): All fetch operations are already enqueue and there's no
				2012	# easy way to cancel them.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2013	raise ConfigError('No command to run')
				2014
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2015	with tools.Profiler('GetRest'):
				2016	# Create file system hierarchy.
				2017	if not os.path.isdir(outdir):
				2018	os.makedirs(outdir)
				2019	create_directories(outdir, settings.files)
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	2020	create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2021
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2022	# Ensure working directory exists.
				2023	cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
				2024	if not os.path.isdir(cwd):
				2025	os.makedirs(cwd)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2026
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2027	# Multimap: digest -> list of pairs (path, props).
				2028	remaining = {}
				2029	for filepath, props in settings.files.iteritems():
				2030	if 'h' in props:
				2031	remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2032
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2033	# Now block on the remaining files to be downloaded and mapped.
				2034	logging.info('Retrieving remaining files (%d of them)...',
				2035	fetch_queue.pending_count)
				2036	last_update = time.time()
				2037	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
				2038	while remaining:
				2039	detector.ping()
				2040
				2041	# Wait for any item to finish fetching to cache.
				2042	digest = fetch_queue.wait(remaining)
				2043
				2044	# Link corresponding files to a fetched item in cache.
				2045	for filepath, props in remaining.pop(digest):
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	2046	cache.hardlink(
				2047	digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2048
				2049	# Report progress.
				2050	duration = time.time() - last_update
				2051	if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
				2052	msg = '%d files remaining...' % len(remaining)
				2053	print msg
				2054	logging.info(msg)
				2055	last_update = time.time()
				2056
				2057	# Cache could evict some items we just tried to fetch, it's a fatal error.
				2058	if not fetch_queue.verify_all_cached():
				2059	raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2060	return settings
				2061
				2062
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2063	def directory_to_metadata(root, algo, blacklist):
				2064	"""Returns the FileItem list and .isolated metadata for a directory."""
				2065	root = file_path.get_native_path_case(root)
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	2066	paths = expand_directory_and_symlink(
				2067	root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2068	metadata = dict(
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	2069	(relpath, process_input(os.path.join(root, relpath), {}, False, algo))
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	2070	for relpath in paths
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2071	)
				2072	for v in metadata.itervalues():
				2073	v.pop('t')
				2074	items = [
				2075	FileItem(
				2076	path=os.path.join(root, relpath),
				2077	digest=meta['h'],
				2078	size=meta['s'],
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2079	high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2080	for relpath, meta in metadata.iteritems() if 'h' in meta
				2081	]
				2082	return items, metadata
				2083
				2084
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2085	def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2086	"""Stores every entries and returns the relevant data.
				2087
				2088	Arguments:
				2089	storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2090	files: list of file paths to upload. If a directory is specified, a
				2091	.isolated file is created and its hash is returned.
				2092	blacklist: function that returns True if a file should be omitted.
				2093	"""
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2094	assert all(isinstance(i, unicode) for i in files), files
				2095	if len(files) != len(set(map(os.path.abspath, files))):
				2096	raise Error('Duplicate entries found.')
				2097
				2098	results = []
				2099	# The temporary directory is only created as needed.
				2100	tempdir = None
				2101	try:
				2102	# TODO(maruel): Yield the files to a worker thread.
				2103	items_to_upload = []
				2104	for f in files:
				2105	try:
				2106	filepath = os.path.abspath(f)
				2107	if os.path.isdir(filepath):
				2108	# Uploading a whole directory.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2109	items, metadata = directory_to_metadata(
				2110	filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2111
				2112	# Create the .isolated file.
				2113	if not tempdir:
				2114	tempdir = tempfile.mkdtemp(prefix='isolateserver')
				2115	handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
				2116	os.close(handle)
				2117	data = {
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2118	'algo': SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2119	'files': metadata,
Marc-Antoine Ruel	1c1edd6	2013-12-06 09:13:13 -0500	[diff] [blame]	2120	'version': ISOLATED_FILE_VERSION,
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2121	}
				2122	save_isolated(isolated, data)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2123	h = hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2124	items_to_upload.extend(items)
				2125	items_to_upload.append(
				2126	FileItem(
				2127	path=isolated,
				2128	digest=h,
				2129	size=os.stat(isolated).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2130	high_priority=True))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2131	results.append((h, f))
				2132
				2133	elif os.path.isfile(filepath):
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2134	h = hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2135	items_to_upload.append(
				2136	FileItem(
				2137	path=filepath,
				2138	digest=h,
				2139	size=os.stat(filepath).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2140	high_priority=f.endswith('.isolated')))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2141	results.append((h, f))
				2142	else:
				2143	raise Error('%s is neither a file or directory.' % f)
				2144	except OSError:
				2145	raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2146	# Technically we would care about which files were uploaded but we don't
				2147	# much in practice.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2148	_uploaded_files = storage.upload_items(items_to_upload)
				2149	return results
				2150	finally:
				2151	if tempdir:
				2152	shutil.rmtree(tempdir)
				2153
				2154
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2155	def archive(out, namespace, files, blacklist):
				2156	if files == ['-']:
				2157	files = sys.stdin.readlines()
				2158
				2159	if not files:
				2160	raise Error('Nothing to upload')
				2161
				2162	files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2163	blacklist = tools.gen_blacklist(blacklist)
				2164	with get_storage(out, namespace) as storage:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2165	results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2166	print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
				2167
				2168
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2169	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2170	def CMDarchive(parser, args):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2171	"""Archives data to the server.
				2172
				2173	If a directory is specified, a .isolated file is created the whole directory
				2174	is uploaded. Then this .isolated file can be included in another one to run
				2175	commands.
				2176
				2177	The commands output each file that was processed with its content hash. For
				2178	directories, the .isolated generated for the directory is listed as the
				2179	directory entry itself.
				2180	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2181	add_isolate_server_options(parser, False)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2182	parser.add_option(
				2183	'--blacklist',
				2184	action='append', default=list(DEFAULT_BLACKLIST),
				2185	help='List of regexp to use as blacklist filter when uploading '
				2186	'directories')
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2187	options, files = parser.parse_args(args)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2188	process_isolate_server_options(parser, options)
Vadim Shtayura	6b555c1	2014-07-23 16:22:18 -0700	[diff] [blame]	2189	if file_path.is_url(options.isolate_server):
				2190	auth.ensure_logged_in(options.isolate_server)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2191	try:
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2192	archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2193	except Error as e:
				2194	parser.error(e.args[0])
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2195	return 0
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2196
				2197
				2198	def CMDdownload(parser, args):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2199	"""Download data from the server.
				2200
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2201	It can either download individual files or a complete tree from a .isolated
				2202	file.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2203	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2204	add_isolate_server_options(parser, True)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2205	parser.add_option(
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2206	'-i', '--isolated', metavar='HASH',
				2207	help='hash of an isolated file, .isolated file content is discarded, use '
				2208	'--file if you need it')
				2209	parser.add_option(
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2210	'-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
				2211	help='hash and destination of a file, can be used multiple times')
				2212	parser.add_option(
				2213	'-t', '--target', metavar='DIR', default=os.getcwd(),
				2214	help='destination directory')
				2215	options, args = parser.parse_args(args)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2216	process_isolate_server_options(parser, options)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2217	if args:
				2218	parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2219	if bool(options.isolated) == bool(options.file):
				2220	parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2221
				2222	options.target = os.path.abspath(options.target)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2223
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2224	remote = options.isolate_server or options.indir
Vadim Shtayura	6b555c1	2014-07-23 16:22:18 -0700	[diff] [blame]	2225	if file_path.is_url(remote):
				2226	auth.ensure_logged_in(remote)
				2227
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2228	with get_storage(remote, options.namespace) as storage:
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2229	# Fetching individual files.
				2230	if options.file:
				2231	channel = threading_utils.TaskChannel()
				2232	pending = {}
				2233	for digest, dest in options.file:
				2234	pending[digest] = dest
				2235	storage.async_fetch(
				2236	channel,
				2237	WorkerPool.MED,
				2238	digest,
				2239	UNKNOWN_FILE_SIZE,
				2240	functools.partial(file_write, os.path.join(options.target, dest)))
				2241	while pending:
				2242	fetched = channel.pull()
				2243	dest = pending.pop(fetched)
				2244	logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2245
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2246	# Fetching whole isolated tree.
				2247	if options.isolated:
				2248	settings = fetch_isolated(
				2249	isolated_hash=options.isolated,
				2250	storage=storage,
				2251	cache=MemoryCache(),
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2252	outdir=options.target,
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2253	require_command=False)
				2254	rel = os.path.join(options.target, settings.relative_cwd)
				2255	print('To run this test please run from the directory %s:' %
				2256	os.path.join(options.target, rel))
				2257	print(' ' + ' '.join(settings.command))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2258
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2259	return 0
				2260
				2261
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2262	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2263	def CMDhashtable(parser, args):
				2264	"""Archives data to a hashtable on the file system.
				2265
				2266	If a directory is specified, a .isolated file is created the whole directory
				2267	is uploaded. Then this .isolated file can be included in another one to run
				2268	commands.
				2269
				2270	The commands output each file that was processed with its content hash. For
				2271	directories, the .isolated generated for the directory is listed as the
				2272	directory entry itself.
				2273	"""
				2274	add_outdir_options(parser)
				2275	parser.add_option(
				2276	'--blacklist',
				2277	action='append', default=list(DEFAULT_BLACKLIST),
				2278	help='List of regexp to use as blacklist filter when uploading '
				2279	'directories')
				2280	options, files = parser.parse_args(args)
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2281	process_outdir_options(parser, options, os.getcwd())
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2282	try:
				2283	# Do not compress files when archiving to the file system.
				2284	archive(options.outdir, 'default', files, options.blacklist)
				2285	except Error as e:
				2286	parser.error(e.args[0])
				2287	return 0
				2288
				2289
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2290	def add_isolate_server_options(parser, add_indir):
				2291	"""Adds --isolate-server and --namespace options to parser.
				2292
				2293	Includes --indir if desired.
				2294	"""
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2295	parser.add_option(
				2296	'-I', '--isolate-server',
				2297	metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2298	help='URL of the Isolate Server to use. Defaults to the environment '
				2299	'variable ISOLATE_SERVER if set. No need to specify https://, this '
				2300	'is assumed.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2301	parser.add_option(
				2302	'--namespace', default='default-gzip',
				2303	help='The namespace to use on the Isolate Server, default: %default')
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2304	if add_indir:
				2305	parser.add_option(
				2306	'--indir', metavar='DIR',
				2307	help='Directory used to store the hashtable instead of using an '
				2308	'isolate server.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2309
				2310
				2311	def process_isolate_server_options(parser, options):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2312	"""Processes the --isolate-server and --indir options and aborts if neither is
				2313	specified.
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2314	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2315	has_indir = hasattr(options, 'indir')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2316	if not options.isolate_server:
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2317	if not has_indir:
				2318	parser.error('--isolate-server is required.')
				2319	elif not options.indir:
				2320	parser.error('Use one of --indir or --isolate-server.')
				2321	else:
				2322	if has_indir and options.indir:
				2323	parser.error('Use only one of --indir or --isolate-server.')
				2324
				2325	if options.isolate_server:
				2326	parts = urlparse.urlparse(options.isolate_server, 'https')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2327	if parts.query:
				2328	parser.error('--isolate-server doesn\'t support query parameter.')
				2329	if parts.fragment:
				2330	parser.error('--isolate-server doesn\'t support fragment in the url.')
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2331	# urlparse('foo.com') will result in netloc='', path='foo.com', which is not
				2332	# what is desired here.
				2333	new = list(parts)
				2334	if not new[1] and new[2]:
				2335	new[1] = new[2].rstrip('/')
				2336	new[2] = ''
				2337	new[2] = new[2].rstrip('/')
				2338	options.isolate_server = urlparse.urlunparse(new)
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	2339	on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2340	return
				2341
				2342	if file_path.is_url(options.indir):
				2343	parser.error('Can\'t use an URL for --indir.')
				2344	options.indir = unicode(options.indir).replace('/', os.path.sep)
				2345	options.indir = os.path.abspath(
				2346	os.path.normpath(os.path.join(os.getcwd(), options.indir)))
				2347	if not os.path.isdir(options.indir):
				2348	parser.error('Path given to --indir must exist.')
				2349
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2350
				2351
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2352	def add_outdir_options(parser):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2353	"""Adds --outdir, which is orthogonal to --isolate-server.
				2354
				2355	Note: On upload, separate commands are used between 'archive' and 'hashtable'.
				2356	On 'download', the same command can download from either an isolate server or
				2357	a file system.
				2358	"""
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2359	parser.add_option(
				2360	'-o', '--outdir', metavar='DIR',
				2361	help='Directory used to recreate the tree.')
				2362
				2363
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2364	def process_outdir_options(parser, options, cwd):
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2365	if not options.outdir:
				2366	parser.error('--outdir is required.')
				2367	if file_path.is_url(options.outdir):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2368	parser.error('Can\'t use an URL for --outdir.')
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2369	options.outdir = unicode(options.outdir).replace('/', os.path.sep)
				2370	# outdir doesn't need native path case since tracing is never done from there.
				2371	options.outdir = os.path.abspath(
				2372	os.path.normpath(os.path.join(cwd, options.outdir)))
				2373	# In theory, we'd create the directory outdir right away. Defer doing it in
				2374	# case there's errors in the command line.
				2375
				2376
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2377	class OptionParserIsolateServer(tools.OptionParserWithLogging):
				2378	def __init__(self, **kwargs):
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	2379	tools.OptionParserWithLogging.__init__(
				2380	self,
				2381	version=__version__,
				2382	prog=os.path.basename(sys.modules[__name__].__file__),
				2383	**kwargs)
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	2384	auth.add_auth_options(self)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2385
				2386	def parse_args(self, args, *kwargs):
				2387	options, args = tools.OptionParserWithLogging.parse_args(
				2388	self, args, *kwargs)
Vadim Shtayura	5d1efce	2014-02-04 10:55:43 -0800	[diff] [blame]	2389	auth.process_auth_options(self, options)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2390	return options, args
				2391
				2392
				2393	def main(args):
				2394	dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	2395	return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	2396
				2397
				2398	if __name__ == '__main__':
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2399	fix_encoding.fix_encoding()
				2400	tools.disable_buffering()
				2401	colorama.init()
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2402	sys.exit(main(sys.argv[1:]))