Blame - isolateserver.py - chromium.googlesource.com/infra/luci/client-py

blob: f41a42c5d1811b699c8b60201ab2d69bd15b8a82 [file] [log] [blame]

maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1	#!/usr/bin/env python
Marc-Antoine Ruel	8add124	2013-11-05 17:28:27 -0500	[diff] [blame]	2	# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruel	e98b112	2013-11-05 20:27:57 -0500	[diff] [blame]	3	# Use of this source code is governed under the Apache License, Version 2.0 that
				4	# can be found in the LICENSE file.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	5
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	6	"""Archives a set of files or directories to a server."""
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	7
Marc-Antoine Ruel	582e224	2014-06-26 15:22:06 -0400	[diff] [blame^]	8	__version__ = '0.3.3'
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	9
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	10	import functools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	11	import hashlib
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	12	import json
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	13	import logging
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	14	import os
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	15	import re
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	16	import shutil
				17	import stat
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	18	import sys
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	19	import tempfile
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	20	import threading
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	21	import time
maruel@chromium.org	e82112e	2013-04-24 14:41:55 +0000	[diff] [blame]	22	import urllib
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	23	import urlparse
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	24	import zlib
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	25
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	26	from third_party import colorama
				27	from third_party.depot_tools import fix_encoding
				28	from third_party.depot_tools import subcommand
				29
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	30	from utils import file_path
vadimsh@chromium.org	6b70621	2013-08-28 15:03:46 +0000	[diff] [blame]	31	from utils import net
vadimsh@chromium.org	b074b16	2013-08-22 17:55:46 +0000	[diff] [blame]	32	from utils import threading_utils
vadimsh@chromium.org	a432647	2013-08-24 02:05:41 +0000	[diff] [blame]	33	from utils import tools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	34
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	35	import auth
				36
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	37
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	38	# Version of isolate protocol passed to the server in /handshake request.
				39	ISOLATE_PROTOCOL_VERSION = '1.0'
Marc-Antoine Ruel	1c1edd6	2013-12-06 09:13:13 -0500	[diff] [blame]	40	# Version stored and expected in .isolated files.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	41	ISOLATED_FILE_VERSION = '1.4'
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	42
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	43
				44	# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	45	# All files are sorted by likelihood of a change in the file content
				46	# (currently file size is used to estimate this: larger the file -> larger the
				47	# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	48	# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	49	# and so on. Numbers here is a trade-off; the more per request, the lower the
				50	# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
				51	# larger values cause longer lookups, increasing the initial latency to start
				52	# uploading, which is especially an issue for large files. This value is
				53	# optimized for the "few thousands files to look up with minimal number of large
				54	# files missing" case.
				55	ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org	07fa759	2013-01-11 18:19:30 +0000	[diff] [blame]	56
maruel@chromium.org	9958e4a	2013-09-17 00:01:48 +0000	[diff] [blame]	57
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	58	# A list of already compressed extension types that should not receive any
				59	# compression before being uploaded.
				60	ALREADY_COMPRESSED_TYPES = [
				61	'7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'pdf', 'png',
				62	'wav', 'zip'
				63	]
				64
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	65
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	66	# The file size to be used when we don't know the correct file size,
				67	# generally used for .isolated files.
				68	UNKNOWN_FILE_SIZE = None
				69
				70
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	71	# Chunk size to use when doing disk I/O.
				72	DISK_FILE_CHUNK = 1024 * 1024
				73
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	74	# Chunk size to use when reading from network stream.
				75	NET_IO_FILE_CHUNK = 16 * 1024
				76
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	77
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	78	# Read timeout in seconds for downloads from isolate storage. If there's no
				79	# response from the server within this timeout whole download will be aborted.
				80	DOWNLOAD_READ_TIMEOUT = 60
				81
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	82	# Maximum expected delay (in seconds) between successive file fetches
				83	# in run_tha_test. If it takes longer than that, a deadlock might be happening
				84	# and all stack frames for all threads are dumped to log.
				85	DEADLOCK_TIMEOUT = 5 * 60
				86
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	87
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	88	# The delay (in seconds) to wait between logging statements when retrieving
				89	# the required files. This is intended to let the user (or buildbot) know that
				90	# the program is still running.
				91	DELAY_BETWEEN_UPDATES_IN_SECS = 30
				92
				93
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	94	# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
				95	# specify the names here.
				96	SUPPORTED_ALGOS = {
				97	'md5': hashlib.md5,
				98	'sha-1': hashlib.sha1,
				99	'sha-512': hashlib.sha512,
				100	}
				101
				102
				103	# Used for serialization.
				104	SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
				105
				106
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	107	DEFAULT_BLACKLIST = (
				108	# Temporary vim or python files.
				109	r'^.+\.(?:pyc\|swp)$',
				110	# .git or .svn directory.
				111	r'^(?:.+' + re.escape(os.path.sep) + r'\|)\.(?:git\|svn)$',
				112	)
				113
				114
				115	# Chromium-specific.
				116	DEFAULT_BLACKLIST += (
				117	r'^.+\.(?:run_test_cases)$',
				118	r'^(?:.+' + re.escape(os.path.sep) + r'\|)testserver\.log$',
				119	)
				120
				121
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	122	class Error(Exception):
				123	"""Generic runtime error."""
				124	pass
				125
				126
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	127	class ConfigError(ValueError):
				128	"""Generic failure to load a .isolated file."""
				129	pass
				130
				131
				132	class MappingError(OSError):
				133	"""Failed to recreate the tree."""
				134	pass
				135
				136
maruel@chromium.org	7b844a6	2013-09-17 13:04:59 +0000	[diff] [blame]	137	def is_valid_hash(value, algo):
				138	"""Returns if the value is a valid hash for the corresponding algorithm."""
				139	size = 2 * algo().digest_size
				140	return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
				141
				142
				143	def hash_file(filepath, algo):
				144	"""Calculates the hash of a file without reading it all in memory at once.
				145
				146	\|algo\| should be one of hashlib hashing algorithm.
				147	"""
				148	digest = algo()
maruel@chromium.org	037758d	2012-12-10 17:59:46 +0000	[diff] [blame]	149	with open(filepath, 'rb') as f:
				150	while True:
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	151	chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org	037758d	2012-12-10 17:59:46 +0000	[diff] [blame]	152	if not chunk:
				153	break
				154	digest.update(chunk)
				155	return digest.hexdigest()
				156
				157
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	158	def stream_read(stream, chunk_size):
				159	"""Reads chunks from \|stream\| and yields them."""
				160	while True:
				161	data = stream.read(chunk_size)
				162	if not data:
				163	break
				164	yield data
				165
				166
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	167	def file_read(filepath, chunk_size=DISK_FILE_CHUNK, offset=0):
				168	"""Yields file content in chunks of \|chunk_size\| starting from \|offset\|."""
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	169	with open(filepath, 'rb') as f:
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	170	if offset:
				171	f.seek(offset)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	172	while True:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	173	data = f.read(chunk_size)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	174	if not data:
				175	break
				176	yield data
				177
				178
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	179	def file_write(filepath, content_generator):
				180	"""Writes file content as generated by content_generator.
				181
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	182	Creates the intermediary directory as needed.
				183
				184	Returns the number of bytes written.
				185
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	186	Meant to be mocked out in unit tests.
				187	"""
				188	filedir = os.path.dirname(filepath)
				189	if not os.path.isdir(filedir):
				190	os.makedirs(filedir)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	191	total = 0
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	192	with open(filepath, 'wb') as f:
				193	for d in content_generator:
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	194	total += len(d)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	195	f.write(d)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	196	return total
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	197
				198
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	199	def zip_compress(content_generator, level=7):
				200	"""Reads chunks from \|content_generator\| and yields zip compressed chunks."""
				201	compressor = zlib.compressobj(level)
				202	for chunk in content_generator:
				203	compressed = compressor.compress(chunk)
				204	if compressed:
				205	yield compressed
				206	tail = compressor.flush(zlib.Z_FINISH)
				207	if tail:
				208	yield tail
				209
				210
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	211	def zip_decompress(content_generator, chunk_size=DISK_FILE_CHUNK):
				212	"""Reads zipped data from \|content_generator\| and yields decompressed data.
				213
				214	Decompresses data in small chunks (no larger than \|chunk_size\|) so that
				215	zip bomb file doesn't cause zlib to preallocate huge amount of memory.
				216
				217	Raises IOError if data is corrupted or incomplete.
				218	"""
				219	decompressor = zlib.decompressobj()
				220	compressed_size = 0
				221	try:
				222	for chunk in content_generator:
				223	compressed_size += len(chunk)
				224	data = decompressor.decompress(chunk, chunk_size)
				225	if data:
				226	yield data
				227	while decompressor.unconsumed_tail:
				228	data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
				229	if data:
				230	yield data
				231	tail = decompressor.flush()
				232	if tail:
				233	yield tail
				234	except zlib.error as e:
				235	raise IOError(
				236	'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
				237	# Ensure all data was read and decompressed.
				238	if decompressor.unused_data or decompressor.unconsumed_tail:
				239	raise IOError('Not all data was decompressed')
				240
				241
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	242	def get_zip_compression_level(filename):
				243	"""Given a filename calculates the ideal zip compression level to use."""
				244	file_ext = os.path.splitext(filename)[1].lower()
				245	# TODO(csharp): Profile to find what compression level works best.
				246	return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
				247
				248
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	249	def create_directories(base_directory, files):
				250	"""Creates the directory structure needed by the given list of files."""
				251	logging.debug('create_directories(%s, %d)', base_directory, len(files))
				252	# Creates the tree of directories to create.
				253	directories = set(os.path.dirname(f) for f in files)
				254	for item in list(directories):
				255	while item:
				256	directories.add(item)
				257	item = os.path.dirname(item)
				258	for d in sorted(directories):
				259	if d:
				260	os.mkdir(os.path.join(base_directory, d))
				261
				262
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	263	def create_symlinks(base_directory, files):
				264	"""Creates any symlinks needed by the given set of files."""
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	265	for filepath, properties in files:
				266	if 'l' not in properties:
				267	continue
				268	if sys.platform == 'win32':
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	269	# TODO(maruel): Create symlink via the win32 api.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	270	logging.warning('Ignoring symlink %s', filepath)
				271	continue
				272	outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	273	# os.symlink() doesn't exist on Windows.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	274	os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	275
				276
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	277	def is_valid_file(filepath, size):
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	278	"""Determines if the given files appears valid.
				279
				280	Currently it just checks the file's size.
				281	"""
				282	if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	283	return os.path.isfile(filepath)
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	284	actual_size = os.stat(filepath).st_size
				285	if size != actual_size:
				286	logging.warning(
				287	'Found invalid item %s; %d != %d',
				288	os.path.basename(filepath), actual_size, size)
				289	return False
				290	return True
				291
				292
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	293	class WorkerPool(threading_utils.AutoRetryThreadPool):
				294	"""Thread pool that automatically retries on IOError and runs a preconfigured
				295	function.
				296	"""
				297	# Initial and maximum number of worker threads.
				298	INITIAL_WORKERS = 2
				299	MAX_WORKERS = 16
				300	RETRIES = 5
				301
				302	def __init__(self):
				303	super(WorkerPool, self).__init__(
				304	[IOError],
				305	self.RETRIES,
				306	self.INITIAL_WORKERS,
				307	self.MAX_WORKERS,
				308	0,
				309	'remote')
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	310
				311
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	312	class Item(object):
				313	"""An item to push to Storage.
				314
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	315	Its digest and size may be provided in advance, if known. Otherwise they will
				316	be derived from content(). If digest is provided, it MUST correspond to
				317	hash algorithm used by Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	318
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	319	When used with Storage, Item starts its life in a main thread, travels
				320	to 'contains' thread, then to 'push' thread and then finally back to
				321	the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	322	"""
				323
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	324	def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	325	self.digest = digest
				326	self.size = size
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	327	self.high_priority = high_priority
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	328	self.compression_level = 6
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	329
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	330	def content(self):
				331	"""Iterable with content of this item as byte string (str) chunks."""
				332	raise NotImplementedError()
				333
				334	def prepare(self, hash_algo):
				335	"""Ensures self.digest and self.size are set.
				336
				337	Uses content() as a source of data to calculate them. Does nothing if digest
				338	and size is already known.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	339
				340	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	341	hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	342	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	343	if self.digest is None or self.size is None:
				344	digest = hash_algo()
				345	total = 0
				346	for chunk in self.content():
				347	digest.update(chunk)
				348	total += len(chunk)
				349	self.digest = digest.hexdigest()
				350	self.size = total
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	351
				352
				353	class FileItem(Item):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	354	"""A file to push to Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	355
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	356	Its digest and size may be provided in advance, if known. Otherwise they will
				357	be derived from the file content.
				358	"""
				359
				360	def __init__(self, path, digest=None, size=None, high_priority=False):
				361	super(FileItem, self).__init__(
				362	digest,
				363	size if size is not None else os.stat(path).st_size,
				364	high_priority)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	365	self.path = path
				366	self.compression_level = get_zip_compression_level(path)
				367
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	368	def content(self):
				369	return file_read(self.path)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	370
				371
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	372	class BufferItem(Item):
				373	"""A byte buffer to push to Storage."""
				374
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	375	def __init__(self, buf, high_priority=False):
				376	super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	377	self.buffer = buf
				378
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	379	def content(self):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	380	return [self.buffer]
				381
				382
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	383	class Storage(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	384	"""Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	385
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	386	Implements compression support, parallel 'contains' checks, parallel uploads
				387	and more.
				388
				389	Works only within single namespace (and thus hashing algorithm and compression
				390	scheme are fixed).
				391
				392	Spawns multiple internal threads. Thread safe, but not fork safe.
				393	"""
				394
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	395	def __init__(self, storage_api):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	396	self._storage_api = storage_api
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	397	self._use_zip = is_namespace_with_compression(storage_api.namespace)
				398	self._hash_algo = get_hash_algo(storage_api.namespace)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	399	self._cpu_thread_pool = None
				400	self._net_thread_pool = None
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	401
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	402	@property
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	403	def hash_algo(self):
				404	"""Hashing algorithm used to name files in storage based on their content.
				405
				406	Defined by \|namespace\|. See also 'get_hash_algo'.
				407	"""
				408	return self._hash_algo
				409
				410	@property
				411	def location(self):
				412	"""Location of a backing store that this class is using.
				413
				414	Exact meaning depends on the storage_api type. For IsolateServer it is
				415	an URL of isolate server, for FileSystem is it a path in file system.
				416	"""
				417	return self._storage_api.location
				418
				419	@property
				420	def namespace(self):
				421	"""Isolate namespace used by this storage.
				422
				423	Indirectly defines hashing scheme and compression method used.
				424	"""
				425	return self._storage_api.namespace
				426
				427	@property
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	428	def cpu_thread_pool(self):
				429	"""ThreadPool for CPU-bound tasks like zipping."""
				430	if self._cpu_thread_pool is None:
				431	self._cpu_thread_pool = threading_utils.ThreadPool(
				432	2, max(threading_utils.num_processors(), 2), 0, 'zip')
				433	return self._cpu_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	434
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	435	@property
				436	def net_thread_pool(self):
				437	"""AutoRetryThreadPool for IO-bound tasks, retries IOError."""
				438	if self._net_thread_pool is None:
				439	self._net_thread_pool = WorkerPool()
				440	return self._net_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	441
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	442	def close(self):
				443	"""Waits for all pending tasks to finish."""
				444	if self._cpu_thread_pool:
				445	self._cpu_thread_pool.join()
				446	self._cpu_thread_pool.close()
				447	self._cpu_thread_pool = None
				448	if self._net_thread_pool:
				449	self._net_thread_pool.join()
				450	self._net_thread_pool.close()
				451	self._net_thread_pool = None
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	452
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	453	def __enter__(self):
				454	"""Context manager interface."""
				455	return self
				456
				457	def __exit__(self, _exc_type, _exc_value, _traceback):
				458	"""Context manager interface."""
				459	self.close()
				460	return False
				461
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	462	def upload_items(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	463	"""Uploads a bunch of items to the isolate server.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	464
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	465	It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	466
				467	Arguments:
				468	items: list of Item instances that represents data to upload.
				469
				470	Returns:
				471	List of items that were uploaded. All other items are already there.
				472	"""
				473	# TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
				474	# used by swarming.py. There's no need to spawn multiple threads and try to
				475	# do stuff in parallel: there's nothing to parallelize. 'contains' check and
				476	# 'push' should be performed sequentially in the context of current thread.
				477
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	478	# Ensure all digests are calculated.
				479	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	480	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	481
vadimsh@chromium.org	672cd2b	2013-10-08 17:49:33 +0000	[diff] [blame]	482	# For each digest keep only first Item that matches it. All other items
				483	# are just indistinguishable copies from the point of view of isolate
				484	# server (it doesn't care about paths at all, only content and digests).
				485	seen = {}
				486	duplicates = 0
				487	for item in items:
				488	if seen.setdefault(item.digest, item) is not item:
				489	duplicates += 1
				490	items = seen.values()
				491	if duplicates:
				492	logging.info('Skipped %d duplicated files', duplicates)
				493
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	494	# Enqueue all upload tasks.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	495	missing = set()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	496	uploaded = []
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	497	channel = threading_utils.TaskChannel()
				498	for missing_item, push_state in self.get_missing_items(items):
				499	missing.add(missing_item)
				500	self.async_push(channel, missing_item, push_state)
				501
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	502	# No need to spawn deadlock detector thread if there's nothing to upload.
				503	if missing:
				504	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
				505	# Wait for all started uploads to finish.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	506	while len(uploaded) != len(missing):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	507	detector.ping()
				508	item = channel.pull()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	509	uploaded.append(item)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	510	logging.debug(
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	511	'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	512	logging.info('All files are uploaded')
				513
				514	# Print stats.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	515	total = len(items)
				516	total_size = sum(f.size for f in items)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	517	logging.info(
				518	'Total: %6d, %9.1fkb',
				519	total,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	520	total_size / 1024.)
				521	cache_hit = set(items) - missing
				522	cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	523	logging.info(
				524	'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				525	len(cache_hit),
				526	cache_hit_size / 1024.,
				527	len(cache_hit) * 100. / total,
				528	cache_hit_size * 100. / total_size if total_size else 0)
				529	cache_miss = missing
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	530	cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	531	logging.info(
				532	'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				533	len(cache_miss),
				534	cache_miss_size / 1024.,
				535	len(cache_miss) * 100. / total,
				536	cache_miss_size * 100. / total_size if total_size else 0)
				537
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	538	return uploaded
				539
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	540	def get_fetch_url(self, item):
				541	"""Returns an URL that can be used to fetch given item once it's uploaded.
				542
				543	Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	544
				545	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	546	item: Item to get fetch URL for.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	547
				548	Returns:
				549	An URL or None if underlying protocol doesn't support this.
				550	"""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	551	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	552	return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	553
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	554	def async_push(self, channel, item, push_state):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	555	"""Starts asynchronous push to the server in a parallel thread.
				556
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	557	Can be used only after \|item\| was checked for presence on a server with
				558	'get_missing_items' call. 'get_missing_items' returns \|push_state\| object
				559	that contains storage specific information describing how to upload
				560	the item (for example in case of cloud storage, it is signed upload URLs).
				561
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	562	Arguments:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	563	channel: TaskChannel that receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	564	item: item to upload as instance of Item class.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	565	push_state: push state returned by 'get_missing_items' call for \|item\|.
				566
				567	Returns:
				568	None, but \|channel\| later receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	569	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	570	# Thread pool task priority.
				571	priority = WorkerPool.HIGH if item.high_priority else WorkerPool.MED
				572
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	573	def push(content):
Marc-Antoine Ruel	095a8be	2014-03-21 14:58:19 -0400	[diff] [blame]	574	"""Pushes an Item and returns it to \|channel\|."""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	575	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	576	self._storage_api.push(item, push_state, content)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	577	return item
				578
				579	# If zipping is not required, just start a push task.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	580	if not self._use_zip:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	581	self.net_thread_pool.add_task_with_channel(
				582	channel, priority, push, item.content())
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	583	return
				584
				585	# If zipping is enabled, zip in a separate thread.
				586	def zip_and_push():
				587	# TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
				588	# content right here. It will block until all file is zipped.
				589	try:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	590	stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	591	data = ''.join(stream)
				592	except Exception as exc:
				593	logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	594	channel.send_exception()
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	595	return
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	596	self.net_thread_pool.add_task_with_channel(
				597	channel, priority, push, [data])
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	598	self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	599
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	600	def push(self, item, push_state):
				601	"""Synchronously pushes a single item to the server.
				602
				603	If you need to push many items at once, consider using 'upload_items' or
				604	'async_push' with instance of TaskChannel.
				605
				606	Arguments:
				607	item: item to upload as instance of Item class.
				608	push_state: push state returned by 'get_missing_items' call for \|item\|.
				609
				610	Returns:
				611	Pushed item (same object as \|item\|).
				612	"""
				613	channel = threading_utils.TaskChannel()
				614	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
				615	self.async_push(channel, item, push_state)
				616	pushed = channel.pull()
				617	assert pushed is item
				618	return item
				619
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	620	def async_fetch(self, channel, priority, digest, size, sink):
				621	"""Starts asynchronous fetch from the server in a parallel thread.
				622
				623	Arguments:
				624	channel: TaskChannel that receives back \|digest\| when download ends.
				625	priority: thread pool task priority for the fetch.
				626	digest: hex digest of an item to download.
				627	size: expected size of the item (after decompression).
				628	sink: function that will be called as sink(generator).
				629	"""
				630	def fetch():
				631	try:
				632	# Prepare reading pipeline.
				633	stream = self._storage_api.fetch(digest)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	634	if self._use_zip:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	635	stream = zip_decompress(stream, DISK_FILE_CHUNK)
				636	# Run \|stream\| through verifier that will assert its size.
				637	verifier = FetchStreamVerifier(stream, size)
				638	# Verified stream goes to \|sink\|.
				639	sink(verifier.run())
				640	except Exception as err:
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	641	logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	642	raise
				643	return digest
				644
				645	# Don't bother with zip_thread_pool for decompression. Decompression is
				646	# really fast and most probably IO bound anyway.
				647	self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
				648
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	649	def get_missing_items(self, items):
				650	"""Yields items that are missing from the server.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	651
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	652	Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	653
				654	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	655	items: a list of Item objects to check.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	656
				657	Yields:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	658	For each missing item it yields a pair (item, push_state), where:
				659	* item - Item object that is missing (one of \|items\|).
				660	* push_state - opaque object that contains storage specific information
				661	describing how to upload the item (for example in case of cloud
				662	storage, it is signed upload URLs). It can later be passed to
				663	'async_push'.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	664	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	665	channel = threading_utils.TaskChannel()
				666	pending = 0
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	667
				668	# Ensure all digests are calculated.
				669	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	670	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	671
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	672	# Enqueue all requests.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	673	for batch in batch_items_for_check(items):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	674	self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
				675	self._storage_api.contains, batch)
				676	pending += 1
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	677
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	678	# Yield results as they come in.
				679	for _ in xrange(pending):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	680	for missing_item, push_state in channel.pull().iteritems():
				681	yield missing_item, push_state
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	682
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	683
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	684	def batch_items_for_check(items):
				685	"""Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	686
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	687	Each batch corresponds to a single 'exists?' query to the server via a call
				688	to StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	689
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	690	Arguments:
				691	items: a list of Item objects.
				692
				693	Yields:
				694	Batches of items to query for existence in a single operation,
				695	each batch is a list of Item objects.
				696	"""
				697	batch_count = 0
				698	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
				699	next_queries = []
				700	for item in sorted(items, key=lambda x: x.size, reverse=True):
				701	next_queries.append(item)
				702	if len(next_queries) == batch_size_limit:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	703	yield next_queries
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	704	next_queries = []
				705	batch_count += 1
				706	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
				707	min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
				708	if next_queries:
				709	yield next_queries
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	710
				711
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	712	class FetchQueue(object):
				713	"""Fetches items from Storage and places them into LocalCache.
				714
				715	It manages multiple concurrent fetch operations. Acts as a bridge between
				716	Storage and LocalCache so that Storage and LocalCache don't depend on each
				717	other at all.
				718	"""
				719
				720	def __init__(self, storage, cache):
				721	self.storage = storage
				722	self.cache = cache
				723	self._channel = threading_utils.TaskChannel()
				724	self._pending = set()
				725	self._accessed = set()
				726	self._fetched = cache.cached_set()
				727
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	728	def add(self, digest, size=UNKNOWN_FILE_SIZE, priority=WorkerPool.MED):
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	729	"""Starts asynchronous fetch of item \|digest\|."""
				730	# Fetching it now?
				731	if digest in self._pending:
				732	return
				733
				734	# Mark this file as in use, verify_all_cached will later ensure it is still
				735	# in cache.
				736	self._accessed.add(digest)
				737
				738	# Already fetched? Notify cache to update item's LRU position.
				739	if digest in self._fetched:
				740	# 'touch' returns True if item is in cache and not corrupted.
				741	if self.cache.touch(digest, size):
				742	return
				743	# Item is corrupted, remove it from cache and fetch it again.
				744	self._fetched.remove(digest)
				745	self.cache.evict(digest)
				746
				747	# TODO(maruel): It should look at the free disk space, the current cache
				748	# size and the size of the new item on every new item:
				749	# - Trim the cache as more entries are listed when free disk space is low,
				750	# otherwise if the amount of data downloaded during the run > free disk
				751	# space, it'll crash.
				752	# - Make sure there's enough free disk space to fit all dependencies of
				753	# this run! If not, abort early.
				754
				755	# Start fetching.
				756	self._pending.add(digest)
				757	self.storage.async_fetch(
				758	self._channel, priority, digest, size,
				759	functools.partial(self.cache.write, digest))
				760
				761	def wait(self, digests):
				762	"""Starts a loop that waits for at least one of \|digests\| to be retrieved.
				763
				764	Returns the first digest retrieved.
				765	"""
				766	# Flush any already fetched items.
				767	for digest in digests:
				768	if digest in self._fetched:
				769	return digest
				770
				771	# Ensure all requested items are being fetched now.
				772	assert all(digest in self._pending for digest in digests), (
				773	digests, self._pending)
				774
				775	# Wait for some requested item to finish fetching.
				776	while self._pending:
				777	digest = self._channel.pull()
				778	self._pending.remove(digest)
				779	self._fetched.add(digest)
				780	if digest in digests:
				781	return digest
				782
				783	# Should never reach this point due to assert above.
				784	raise RuntimeError('Impossible state')
				785
				786	def inject_local_file(self, path, algo):
				787	"""Adds local file to the cache as if it was fetched from storage."""
				788	with open(path, 'rb') as f:
				789	data = f.read()
				790	digest = algo(data).hexdigest()
				791	self.cache.write(digest, [data])
				792	self._fetched.add(digest)
				793	return digest
				794
				795	@property
				796	def pending_count(self):
				797	"""Returns number of items to be fetched."""
				798	return len(self._pending)
				799
				800	def verify_all_cached(self):
				801	"""True if all accessed items are in cache."""
				802	return self._accessed.issubset(self.cache.cached_set())
				803
				804
				805	class FetchStreamVerifier(object):
				806	"""Verifies that fetched file is valid before passing it to the LocalCache."""
				807
				808	def __init__(self, stream, expected_size):
				809	self.stream = stream
				810	self.expected_size = expected_size
				811	self.current_size = 0
				812
				813	def run(self):
				814	"""Generator that yields same items as \|stream\|.
				815
				816	Verifies \|stream\| is complete before yielding a last chunk to consumer.
				817
				818	Also wraps IOError produced by consumer into MappingError exceptions since
				819	otherwise Storage will retry fetch on unrelated local cache errors.
				820	"""
				821	# Read one chunk ahead, keep it in \|stored\|.
				822	# That way a complete stream can be verified before pushing last chunk
				823	# to consumer.
				824	stored = None
				825	for chunk in self.stream:
				826	assert chunk is not None
				827	if stored is not None:
				828	self._inspect_chunk(stored, is_last=False)
				829	try:
				830	yield stored
				831	except IOError as exc:
				832	raise MappingError('Failed to store an item in cache: %s' % exc)
				833	stored = chunk
				834	if stored is not None:
				835	self._inspect_chunk(stored, is_last=True)
				836	try:
				837	yield stored
				838	except IOError as exc:
				839	raise MappingError('Failed to store an item in cache: %s' % exc)
				840
				841	def _inspect_chunk(self, chunk, is_last):
				842	"""Called for each fetched chunk before passing it to consumer."""
				843	self.current_size += len(chunk)
				844	if (is_last and (self.expected_size != UNKNOWN_FILE_SIZE) and
				845	(self.expected_size != self.current_size)):
				846	raise IOError('Incorrect file size: expected %d, got %d' % (
				847	self.expected_size, self.current_size))
				848
				849
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	850	class StorageApi(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	851	"""Interface for classes that implement low-level storage operations.
				852
				853	StorageApi is oblivious of compression and hashing scheme used. This details
				854	are handled in higher level Storage class.
				855
				856	Clients should generally not use StorageApi directly. Storage class is
				857	preferred since it implements compression and upload optimizations.
				858	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	859
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	860	@property
				861	def location(self):
				862	"""Location of a backing store that this class is using.
				863
				864	Exact meaning depends on the type. For IsolateServer it is an URL of isolate
				865	server, for FileSystem is it a path in file system.
				866	"""
				867	raise NotImplementedError()
				868
				869	@property
				870	def namespace(self):
				871	"""Isolate namespace used by this storage.
				872
				873	Indirectly defines hashing scheme and compression method used.
				874	"""
				875	raise NotImplementedError()
				876
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	877	def get_fetch_url(self, digest):
				878	"""Returns an URL that can be used to fetch an item with given digest.
				879
				880	Arguments:
				881	digest: hex digest of item to fetch.
				882
				883	Returns:
				884	An URL or None if the protocol doesn't support this.
				885	"""
				886	raise NotImplementedError()
				887
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	888	def fetch(self, digest, offset=0):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	889	"""Fetches an object and yields its content.
				890
				891	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	892	digest: hash digest of item to download.
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	893	offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	894
				895	Yields:
				896	Chunks of downloaded item (as str objects).
				897	"""
				898	raise NotImplementedError()
				899
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	900	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	901	"""Uploads an \|item\| with content generated by \|content\| generator.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	902
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	903	\|item\| MUST go through 'contains' call to get \|push_state\| before it can
				904	be pushed to the storage.
				905
				906	To be clear, here is one possible usage:
				907	all_items = [... all items to push as Item subclasses ...]
				908	for missing_item, push_state in storage_api.contains(all_items).items():
				909	storage_api.push(missing_item, push_state)
				910
				911	When pushing to a namespace with compression, data that should be pushed
				912	and data provided by the item is not the same. In that case \|content\| is
				913	not None and it yields chunks of compressed data (using item.content() as
				914	a source of original uncompressed data). This is implemented by Storage
				915	class.
				916
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	917	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	918	item: Item object that holds information about an item being pushed.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	919	push_state: push state object as returned by 'contains' call.
				920	content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	921
				922	Returns:
				923	None.
				924	"""
				925	raise NotImplementedError()
				926
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	927	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	928	"""Checks for \|items\| on the server, prepares missing ones for upload.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	929
				930	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	931	items: list of Item objects to check for presence.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	932
				933	Returns:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	934	A dict missing Item -> opaque push state object to be passed to 'push'.
				935	See doc string for 'push'.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	936	"""
				937	raise NotImplementedError()
				938
				939
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	940	class _IsolateServerPushState(object):
				941	"""Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	942
				943	Note this needs to be a global class to support pickling.
				944	"""
				945
				946	def __init__(self, upload_url, finalize_url):
				947	self.upload_url = upload_url
				948	self.finalize_url = finalize_url
				949	self.uploaded = False
				950	self.finalized = False
				951
				952
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	953	class IsolateServer(StorageApi):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	954	"""StorageApi implementation that downloads and uploads to Isolate Server.
				955
				956	It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	957	Works only within single namespace.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	958	"""
				959
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	960	def __init__(self, base_url, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	961	super(IsolateServer, self).__init__()
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	962	assert base_url.startswith('http'), base_url
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	963	self._base_url = base_url.rstrip('/')
				964	self._namespace = namespace
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	965	self._lock = threading.Lock()
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	966	self._server_caps = None
				967
				968	@staticmethod
				969	def _generate_handshake_request():
				970	"""Returns a dict to be sent as handshake request body."""
				971	# TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
				972	return {
				973	'client_app_version': __version__,
				974	'fetcher': True,
				975	'protocol_version': ISOLATE_PROTOCOL_VERSION,
				976	'pusher': True,
				977	}
				978
				979	@staticmethod
				980	def _validate_handshake_response(caps):
				981	"""Validates and normalizes handshake response."""
				982	logging.info('Protocol version: %s', caps['protocol_version'])
				983	logging.info('Server version: %s', caps['server_app_version'])
				984	if caps.get('error'):
				985	raise MappingError(caps['error'])
				986	if not caps['access_token']:
				987	raise ValueError('access_token is missing')
				988	return caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	989
				990	@property
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	991	def _server_capabilities(self):
				992	"""Performs handshake with the server if not yet done.
				993
				994	Returns:
				995	Server capabilities dictionary as returned by /handshake endpoint.
				996
				997	Raises:
				998	MappingError if server rejects the handshake.
				999	"""
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	1000	# TODO(maruel): Make this request much earlier asynchronously while the
				1001	# files are being enumerated.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1002
				1003	# TODO(vadimsh): Put \|namespace\| in the URL so that server can apply
				1004	# namespace-level ACLs to this call.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1005	with self._lock:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1006	if self._server_caps is None:
				1007	request_body = json.dumps(
				1008	self._generate_handshake_request(), separators=(',', ':'))
				1009	response = net.url_read(
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1010	url=self._base_url + '/content-gs/handshake',
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1011	data=request_body,
				1012	content_type='application/json',
				1013	method='POST')
				1014	if response is None:
				1015	raise MappingError('Failed to perform handshake.')
				1016	try:
				1017	caps = json.loads(response)
				1018	if not isinstance(caps, dict):
				1019	raise ValueError('Expecting JSON dict')
				1020	self._server_caps = self._validate_handshake_response(caps)
				1021	except (ValueError, KeyError, TypeError) as exc:
				1022	# KeyError exception has very confusing str conversion: it's just a
				1023	# missing key value and nothing else. So print exception class name
				1024	# as well.
				1025	raise MappingError('Invalid handshake response (%s): %s' % (
				1026	exc.__class__.__name__, exc))
				1027	return self._server_caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1028
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1029	@property
				1030	def location(self):
				1031	return self._base_url
				1032
				1033	@property
				1034	def namespace(self):
				1035	return self._namespace
				1036
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1037	def get_fetch_url(self, digest):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1038	assert isinstance(digest, basestring)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1039	return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1040	self._base_url, self._namespace, digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1041
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1042	def fetch(self, digest, offset=0):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1043	source_url = self.get_fetch_url(digest)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1044	logging.debug('download_file(%s, %d)', source_url, offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1045
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1046	connection = net.url_open(
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1047	source_url,
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1048	read_timeout=DOWNLOAD_READ_TIMEOUT,
				1049	headers={'Range': 'bytes=%d-' % offset} if offset else None)
				1050
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1051	if not connection:
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	1052	raise IOError('Request failed - %s' % source_url)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1053
				1054	# If \|offset\| is used, verify server respects it by checking Content-Range.
				1055	if offset:
				1056	content_range = connection.get_header('Content-Range')
				1057	if not content_range:
				1058	raise IOError('Missing Content-Range header')
				1059
				1060	# 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
				1061	# According to a spec, <size> can be '*' meaning "Total size of the file
				1062	# is not known in advance".
				1063	try:
				1064	match = re.match(r'bytes (\d+)-(\d+)/(\d+\|\*)', content_range)
				1065	if not match:
				1066	raise ValueError()
				1067	content_offset = int(match.group(1))
				1068	last_byte_index = int(match.group(2))
				1069	size = None if match.group(3) == '*' else int(match.group(3))
				1070	except ValueError:
				1071	raise IOError('Invalid Content-Range header: %s' % content_range)
				1072
				1073	# Ensure returned offset equals requested one.
				1074	if offset != content_offset:
				1075	raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
				1076	offset, content_offset, content_range))
				1077
				1078	# Ensure entire tail of the file is returned.
				1079	if size is not None and last_byte_index + 1 != size:
				1080	raise IOError('Incomplete response. Content-Range: %s' % content_range)
				1081
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1082	return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1083
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1084	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1085	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1086	assert item.digest is not None
				1087	assert item.size is not None
				1088	assert isinstance(push_state, _IsolateServerPushState)
				1089	assert not push_state.finalized
				1090
				1091	# Default to item.content().
				1092	content = item.content() if content is None else content
				1093
				1094	# Do not iterate byte by byte over 'str'. Push it all as a single chunk.
				1095	if isinstance(content, basestring):
				1096	assert not isinstance(content, unicode), 'Unicode string is not allowed'
				1097	content = [content]
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1098
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1099	# TODO(vadimsh): Do not read from \|content\| generator when retrying push.
				1100	# If \|content\| is indeed a generator, it can not be re-winded back
				1101	# to the beginning of the stream. A retry will find it exhausted. A possible
				1102	# solution is to wrap \|content\| generator with some sort of caching
				1103	# restartable generator. It should be done alongside streaming support
				1104	# implementation.
				1105
				1106	# This push operation may be a retry after failed finalization call below,
				1107	# no need to reupload contents in that case.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1108	if not push_state.uploaded:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1109	# A cheezy way to avoid memcpy of (possibly huge) file, until streaming
				1110	# upload support is implemented.
				1111	if isinstance(content, list) and len(content) == 1:
				1112	content = content[0]
				1113	else:
				1114	content = ''.join(content)
				1115	# PUT file to \|upload_url\|.
				1116	response = net.url_read(
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1117	url=push_state.upload_url,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1118	data=content,
				1119	content_type='application/octet-stream',
				1120	method='PUT')
				1121	if response is None:
				1122	raise IOError('Failed to upload a file %s to %s' % (
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1123	item.digest, push_state.upload_url))
				1124	push_state.uploaded = True
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1125	else:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1126	logging.info(
				1127	'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1128
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1129	# Optionally notify the server that it's done.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1130	if push_state.finalize_url:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1131	# TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
				1132	# send it to isolated server. That way isolate server can verify that
				1133	# the data safely reached Google Storage (GS provides MD5 and CRC32C of
				1134	# stored files).
				1135	response = net.url_read(
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1136	url=push_state.finalize_url,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1137	data='',
				1138	content_type='application/json',
				1139	method='POST')
				1140	if response is None:
				1141	raise IOError('Failed to finalize an upload of %s' % item.digest)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1142	push_state.finalized = True
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1143
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1144	def contains(self, items):
				1145	logging.info('Checking existence of %d files...', len(items))
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1146
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1147	# Ensure all items were initialized with 'prepare' call. Storage does that.
				1148	assert all(i.digest is not None and i.size is not None for i in items)
				1149
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1150	# Request body is a json encoded list of dicts.
				1151	body = [
				1152	{
				1153	'h': item.digest,
				1154	's': item.size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1155	'i': int(item.high_priority),
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1156	} for item in items
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1157	]
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1158
				1159	query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1160	self._base_url,
				1161	self._namespace,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1162	urllib.quote(self._server_capabilities['access_token']))
				1163	response_body = net.url_read(
				1164	url=query_url,
				1165	data=json.dumps(body, separators=(',', ':')),
				1166	content_type='application/json',
				1167	method='POST')
				1168	if response_body is None:
				1169	raise MappingError('Failed to execute /pre-upload query')
				1170
				1171	# Response body is a list of push_urls (or null if file is already present).
				1172	try:
				1173	response = json.loads(response_body)
				1174	if not isinstance(response, list):
				1175	raise ValueError('Expecting response with json-encoded list')
				1176	if len(response) != len(items):
				1177	raise ValueError(
				1178	'Incorrect number of items in the list, expected %d, '
				1179	'but got %d' % (len(items), len(response)))
				1180	except ValueError as err:
				1181	raise MappingError(
				1182	'Invalid response from server: %s, body is %s' % (err, response_body))
				1183
				1184	# Pick Items that are missing, attach _PushState to them.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1185	missing_items = {}
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1186	for i, push_urls in enumerate(response):
				1187	if push_urls:
				1188	assert len(push_urls) == 2, str(push_urls)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1189	missing_items[items[i]] = _IsolateServerPushState(
				1190	push_urls[0], push_urls[1])
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1191	logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1192	len(items), len(items) - len(missing_items))
				1193	return missing_items
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1194
				1195
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1196	class FileSystem(StorageApi):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1197	"""StorageApi implementation that fetches data from the file system.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1198
				1199	The common use case is a NFS/CIFS file server that is mounted locally that is
				1200	used to fetch the file on a local partition.
				1201	"""
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1202
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1203	# Used for push_state instead of None. That way caller is forced to
				1204	# call 'contains' before 'push'. Naively passing None in 'push' will not work.
				1205	_DUMMY_PUSH_STATE = object()
				1206
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1207	def __init__(self, base_path, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1208	super(FileSystem, self).__init__()
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1209	self._base_path = base_path
				1210	self._namespace = namespace
				1211
				1212	@property
				1213	def location(self):
				1214	return self._base_path
				1215
				1216	@property
				1217	def namespace(self):
				1218	return self._namespace
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1219
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1220	def get_fetch_url(self, digest):
				1221	return None
				1222
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1223	def fetch(self, digest, offset=0):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1224	assert isinstance(digest, basestring)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1225	return file_read(os.path.join(self._base_path, digest), offset=offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1226
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1227	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1228	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1229	assert item.digest is not None
				1230	assert item.size is not None
				1231	assert push_state is self._DUMMY_PUSH_STATE
				1232	content = item.content() if content is None else content
				1233	if isinstance(content, basestring):
				1234	assert not isinstance(content, unicode), 'Unicode string is not allowed'
				1235	content = [content]
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1236	file_write(os.path.join(self._base_path, item.digest), content)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1237
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1238	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1239	assert all(i.digest is not None and i.size is not None for i in items)
				1240	return dict(
				1241	(item, self._DUMMY_PUSH_STATE) for item in items
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1242	if not os.path.exists(os.path.join(self._base_path, item.digest))
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1243	)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1244
				1245
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1246	class LocalCache(object):
				1247	"""Local cache that stores objects fetched via Storage.
				1248
				1249	It can be accessed concurrently from multiple threads, so it should protect
				1250	its internal state with some lock.
				1251	"""
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1252	cache_dir = None
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1253
				1254	def __enter__(self):
				1255	"""Context manager interface."""
				1256	return self
				1257
				1258	def __exit__(self, _exc_type, _exec_value, _traceback):
				1259	"""Context manager interface."""
				1260	return False
				1261
				1262	def cached_set(self):
				1263	"""Returns a set of all cached digests (always a new object)."""
				1264	raise NotImplementedError()
				1265
				1266	def touch(self, digest, size):
				1267	"""Ensures item is not corrupted and updates its LRU position.
				1268
				1269	Arguments:
				1270	digest: hash digest of item to check.
				1271	size: expected size of this item.
				1272
				1273	Returns:
				1274	True if item is in cache and not corrupted.
				1275	"""
				1276	raise NotImplementedError()
				1277
				1278	def evict(self, digest):
				1279	"""Removes item from cache if it's there."""
				1280	raise NotImplementedError()
				1281
				1282	def read(self, digest):
				1283	"""Returns contents of the cached item as a single str."""
				1284	raise NotImplementedError()
				1285
				1286	def write(self, digest, content):
				1287	"""Reads data from \|content\| generator and stores it in cache."""
				1288	raise NotImplementedError()
				1289
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1290	def hardlink(self, digest, dest, file_mode):
				1291	"""Ensures file at \|dest\| has same content as cached \|digest\|.
				1292
				1293	If file_mode is provided, it is used to set the executable bit if
				1294	applicable.
				1295	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1296	raise NotImplementedError()
				1297
				1298
				1299	class MemoryCache(LocalCache):
				1300	"""LocalCache implementation that stores everything in memory."""
				1301
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1302	def __init__(self, file_mode_mask=0500):
				1303	"""Args:
				1304	file_mode_mask: bit mask to AND file mode with. Default value will make
				1305	all mapped files to be read only.
				1306	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1307	super(MemoryCache, self).__init__()
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1308	self._file_mode_mask = file_mode_mask
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1309	# Let's not assume dict is thread safe.
				1310	self._lock = threading.Lock()
				1311	self._contents = {}
				1312
				1313	def cached_set(self):
				1314	with self._lock:
				1315	return set(self._contents)
				1316
				1317	def touch(self, digest, size):
				1318	with self._lock:
				1319	return digest in self._contents
				1320
				1321	def evict(self, digest):
				1322	with self._lock:
				1323	self._contents.pop(digest, None)
				1324
				1325	def read(self, digest):
				1326	with self._lock:
				1327	return self._contents[digest]
				1328
				1329	def write(self, digest, content):
				1330	# Assemble whole stream before taking the lock.
				1331	data = ''.join(content)
				1332	with self._lock:
				1333	self._contents[digest] = data
				1334
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1335	def hardlink(self, digest, dest, file_mode):
				1336	"""Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1337	file_write(dest, [self.read(digest)])
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1338	if file_mode is not None:
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1339	os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1340
				1341
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1342	def get_hash_algo(_namespace):
				1343	"""Return hash algorithm class to use when uploading to given \|namespace\|."""
				1344	# TODO(vadimsh): Implement this at some point.
				1345	return hashlib.sha1
				1346
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1347
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1348	def is_namespace_with_compression(namespace):
				1349	"""Returns True if given \|namespace\| stores compressed objects."""
				1350	return namespace.endswith(('-gzip', '-deflate'))
				1351
				1352
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1353	def get_storage_api(file_or_url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1354	"""Returns an object that implements low-level StorageApi interface.
				1355
				1356	It is used by Storage to work with single isolate \|namespace\|. It should
				1357	rarely be used directly by clients, see 'get_storage' for
				1358	a better alternative.
				1359
				1360	Arguments:
				1361	file_or_url: a file path to use file system based storage, or URL of isolate
				1362	service to use shared cloud based storage.
				1363	namespace: isolate namespace to operate in, also defines hashing and
				1364	compression scheme used, i.e. namespace names that end with '-gzip'
				1365	store compressed data.
				1366
				1367	Returns:
				1368	Instance of StorageApi subclass.
				1369	"""
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	1370	if file_path.is_url(file_or_url):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1371	return IsolateServer(file_or_url, namespace)
				1372	else:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1373	return FileSystem(file_or_url, namespace)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1374
				1375
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1376	def get_storage(file_or_url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1377	"""Returns Storage class that can upload and download from \|namespace\|.
				1378
				1379	Arguments:
				1380	file_or_url: a file path to use file system based storage, or URL of isolate
				1381	service to use shared cloud based storage.
				1382	namespace: isolate namespace to operate in, also defines hashing and
				1383	compression scheme used, i.e. namespace names that end with '-gzip'
				1384	store compressed data.
				1385
				1386	Returns:
				1387	Instance of Storage.
				1388	"""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1389	return Storage(get_storage_api(file_or_url, namespace))
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1390
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1391
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1392	def expand_symlinks(indir, relfile):
				1393	"""Follows symlinks in \|relfile\|, but treating symlinks that point outside the
				1394	build tree as if they were ordinary directories/files. Returns the final
				1395	symlink-free target and a list of paths to symlinks encountered in the
				1396	process.
				1397
				1398	The rule about symlinks outside the build tree is for the benefit of the
				1399	Chromium OS ebuild, which symlinks the output directory to an unrelated path
				1400	in the chroot.
				1401
				1402	Fails when a directory loop is detected, although in theory we could support
				1403	that case.
				1404	"""
				1405	is_directory = relfile.endswith(os.path.sep)
				1406	done = indir
				1407	todo = relfile.strip(os.path.sep)
				1408	symlinks = []
				1409
				1410	while todo:
				1411	pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
				1412	done, todo)
				1413	if not symlink:
				1414	todo = file_path.fix_native_path_case(done, todo)
				1415	done = os.path.join(done, todo)
				1416	break
				1417	symlink_path = os.path.join(done, pre_symlink, symlink)
				1418	post_symlink = post_symlink.lstrip(os.path.sep)
				1419	# readlink doesn't exist on Windows.
				1420	# pylint: disable=E1101
				1421	target = os.path.normpath(os.path.join(done, pre_symlink))
				1422	symlink_target = os.readlink(symlink_path)
				1423	if os.path.isabs(symlink_target):
				1424	# Absolute path are considered a normal directories. The use case is
				1425	# generally someone who puts the output directory on a separate drive.
				1426	target = symlink_target
				1427	else:
				1428	# The symlink itself could be using the wrong path case.
				1429	target = file_path.fix_native_path_case(target, symlink_target)
				1430
				1431	if not os.path.exists(target):
				1432	raise MappingError(
				1433	'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
				1434	target = file_path.get_native_path_case(target)
				1435	if not file_path.path_starts_with(indir, target):
				1436	done = symlink_path
				1437	todo = post_symlink
				1438	continue
				1439	if file_path.path_starts_with(target, symlink_path):
				1440	raise MappingError(
				1441	'Can\'t map recursive symlink reference %s -> %s' %
				1442	(symlink_path, target))
				1443	logging.info('Found symlink: %s -> %s', symlink_path, target)
				1444	symlinks.append(os.path.relpath(symlink_path, indir))
				1445	# Treat the common prefix of the old and new paths as done, and start
				1446	# scanning again.
				1447	target = target.split(os.path.sep)
				1448	symlink_path = symlink_path.split(os.path.sep)
				1449	prefix_length = 0
				1450	for target_piece, symlink_path_piece in zip(target, symlink_path):
				1451	if target_piece == symlink_path_piece:
				1452	prefix_length += 1
				1453	else:
				1454	break
				1455	done = os.path.sep.join(target[:prefix_length])
				1456	todo = os.path.join(
				1457	os.path.sep.join(target[prefix_length:]), post_symlink)
				1458
				1459	relfile = os.path.relpath(done, indir)
				1460	relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
				1461	return relfile, symlinks
				1462
				1463
				1464	def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
				1465	"""Expands a single input. It can result in multiple outputs.
				1466
				1467	This function is recursive when relfile is a directory.
				1468
				1469	Note: this code doesn't properly handle recursive symlink like one created
				1470	with:
				1471	ln -s .. foo
				1472	"""
				1473	if os.path.isabs(relfile):
				1474	raise MappingError('Can\'t map absolute path %s' % relfile)
				1475
				1476	infile = file_path.normpath(os.path.join(indir, relfile))
				1477	if not infile.startswith(indir):
				1478	raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
				1479
				1480	filepath = os.path.join(indir, relfile)
				1481	native_filepath = file_path.get_native_path_case(filepath)
				1482	if filepath != native_filepath:
				1483	# Special case './'.
				1484	if filepath != native_filepath + '.' + os.path.sep:
Marc-Antoine Ruel	582e224	2014-06-26 15:22:06 -0400	[diff] [blame^]	1485	# While it'd be nice to enforce path casing on Windows, it's impractical.
				1486	# Also give up enforcing strict path case on OSX. Really, it's that sad.
				1487	# The case where it happens is very specific and hard to reproduce:
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1488	# get_native_path_case(
				1489	# u'Foo.framework/Versions/A/Resources/Something.nib') will return
				1490	# u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
				1491	#
				1492	# Note that this is really something deep in OSX because running
				1493	# ls Foo.framework/Versions/A
				1494	# will print out 'Resources', while file_path.get_native_path_case()
				1495	# returns a lower case 'r'.
				1496	#
				1497	# So something is happening under the hood resulting in the command 'ls'
				1498	# and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
				1499	# have no idea why.
Marc-Antoine Ruel	582e224	2014-06-26 15:22:06 -0400	[diff] [blame^]	1500	if sys.platform not in ('darwin', 'win32'):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1501	raise MappingError(
				1502	'File path doesn\'t equal native file path\n%s != %s' %
				1503	(filepath, native_filepath))
				1504
				1505	symlinks = []
				1506	if follow_symlinks:
				1507	relfile, symlinks = expand_symlinks(indir, relfile)
				1508
				1509	if relfile.endswith(os.path.sep):
				1510	if not os.path.isdir(infile):
				1511	raise MappingError(
				1512	'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
				1513
				1514	# Special case './'.
				1515	if relfile.startswith('.' + os.path.sep):
				1516	relfile = relfile[2:]
				1517	outfiles = symlinks
				1518	try:
				1519	for filename in os.listdir(infile):
				1520	inner_relfile = os.path.join(relfile, filename)
				1521	if blacklist and blacklist(inner_relfile):
				1522	continue
				1523	if os.path.isdir(os.path.join(indir, inner_relfile)):
				1524	inner_relfile += os.path.sep
				1525	outfiles.extend(
				1526	expand_directory_and_symlink(indir, inner_relfile, blacklist,
				1527	follow_symlinks))
				1528	return outfiles
				1529	except OSError as e:
				1530	raise MappingError(
				1531	'Unable to iterate over directory %s.\n%s' % (infile, e))
				1532	else:
				1533	# Always add individual files even if they were blacklisted.
				1534	if os.path.isdir(infile):
				1535	raise MappingError(
				1536	'Input directory %s must have a trailing slash' % infile)
				1537
				1538	if not os.path.isfile(infile):
				1539	raise MappingError('Input file %s doesn\'t exist' % infile)
				1540
				1541	return symlinks + [relfile]
				1542
				1543
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1544	def process_input(filepath, prevdict, read_only, algo):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1545	"""Processes an input file, a dependency, and return meta data about it.
				1546
				1547	Behaviors:
				1548	- Retrieves the file mode, file size, file timestamp, file link
				1549	destination if it is a file link and calcultate the SHA-1 of the file's
				1550	content if the path points to a file and not a symlink.
				1551
				1552	Arguments:
				1553	filepath: File to act on.
				1554	prevdict: the previous dictionary. It is used to retrieve the cached sha-1
				1555	to skip recalculating the hash. Optional.
Marc-Antoine Ruel	7124e39	2014-01-09 11:49:21 -0500	[diff] [blame]	1556	read_only: If 1 or 2, the file mode is manipulated. In practice, only save
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1557	one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
				1558	windows, mode is not set since all files are 'executable' by
				1559	default.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1560	algo: Hashing algorithm used.
				1561
				1562	Returns:
				1563	The necessary data to create a entry in the 'files' section of an .isolated
				1564	file.
				1565	"""
				1566	out = {}
				1567	# TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
				1568	# if prevdict.get('T') == True:
				1569	# # The file's content is ignored. Skip the time and hard code mode.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1570	# out['s'] = 0
				1571	# out['h'] = algo().hexdigest()
				1572	# out['T'] = True
				1573	# return out
				1574
				1575	# Always check the file stat and check if it is a link. The timestamp is used
				1576	# to know if the file's content/symlink destination should be looked into.
				1577	# E.g. only reuse from prevdict if the timestamp hasn't changed.
				1578	# There is the risk of the file's timestamp being reset to its last value
				1579	# manually while its content changed. We don't protect against that use case.
				1580	try:
				1581	filestats = os.lstat(filepath)
				1582	except OSError:
				1583	# The file is not present.
				1584	raise MappingError('%s is missing' % filepath)
				1585	is_link = stat.S_ISLNK(filestats.st_mode)
				1586
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1587	if sys.platform != 'win32':
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1588	# Ignore file mode on Windows since it's not really useful there.
				1589	filemode = stat.S_IMODE(filestats.st_mode)
				1590	# Remove write access for group and all access to 'others'.
				1591	filemode &= ~(stat.S_IWGRP \| stat.S_IRWXO)
				1592	if read_only:
				1593	filemode &= ~stat.S_IWUSR
				1594	if filemode & stat.S_IXUSR:
				1595	filemode \|= stat.S_IXGRP
				1596	else:
				1597	filemode &= ~stat.S_IXGRP
				1598	if not is_link:
				1599	out['m'] = filemode
				1600
				1601	# Used to skip recalculating the hash or link destination. Use the most recent
				1602	# update time.
				1603	# TODO(maruel): Save it in the .state file instead of .isolated so the
				1604	# .isolated file is deterministic.
				1605	out['t'] = int(round(filestats.st_mtime))
				1606
				1607	if not is_link:
				1608	out['s'] = filestats.st_size
				1609	# If the timestamp wasn't updated and the file size is still the same, carry
				1610	# on the sha-1.
				1611	if (prevdict.get('t') == out['t'] and
				1612	prevdict.get('s') == out['s']):
				1613	# Reuse the previous hash if available.
				1614	out['h'] = prevdict.get('h')
				1615	if not out.get('h'):
				1616	out['h'] = hash_file(filepath, algo)
				1617	else:
				1618	# If the timestamp wasn't updated, carry on the link destination.
				1619	if prevdict.get('t') == out['t']:
				1620	# Reuse the previous link destination if available.
				1621	out['l'] = prevdict.get('l')
				1622	if out.get('l') is None:
				1623	# The link could be in an incorrect path case. In practice, this only
				1624	# happen on OSX on case insensitive HFS.
				1625	# TODO(maruel): It'd be better if it was only done once, in
				1626	# expand_directory_and_symlink(), so it would not be necessary to do again
				1627	# here.
				1628	symlink_value = os.readlink(filepath) # pylint: disable=E1101
				1629	filedir = file_path.get_native_path_case(os.path.dirname(filepath))
				1630	native_dest = file_path.fix_native_path_case(filedir, symlink_value)
				1631	out['l'] = os.path.relpath(native_dest, filedir)
				1632	return out
				1633
				1634
				1635	def save_isolated(isolated, data):
				1636	"""Writes one or multiple .isolated files.
				1637
				1638	Note: this reference implementation does not create child .isolated file so it
				1639	always returns an empty list.
				1640
				1641	Returns the list of child isolated files that are included by \|isolated\|.
				1642	"""
				1643	# Make sure the data is valid .isolated data by 'reloading' it.
				1644	algo = SUPPORTED_ALGOS[data['algo']]
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1645	load_isolated(json.dumps(data), algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1646	tools.write_json(isolated, data, True)
				1647	return []
				1648
				1649
maruel@chromium.org	7b844a6	2013-09-17 13:04:59 +0000	[diff] [blame]	1650	def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1651	"""Uploads the given tree to the given url.
				1652
				1653	Arguments:
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	1654	base_url: The base url, it is assume that \|base_url\|/has/ can be used to
				1655	query if an element was already uploaded, and \|base_url\|/store/
				1656	can be used to upload a new element.
				1657	indir: Root directory the infiles are based in.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1658	infiles: dict of files to upload from \|indir\| to \|base_url\|.
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	1659	namespace: The namespace to use on the server.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1660	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1661	logging.info('upload_tree(indir=%s, files=%d)', indir, len(infiles))
				1662
				1663	# Convert \|indir\| + \|infiles\| into a list of FileItem objects.
				1664	# Filter out symlinks, since they are not represented by items on isolate
				1665	# server side.
				1666	items = [
				1667	FileItem(
				1668	path=os.path.join(indir, filepath),
				1669	digest=metadata['h'],
				1670	size=metadata['s'],
				1671	high_priority=metadata.get('priority') == '0')
				1672	for filepath, metadata in infiles.iteritems()
				1673	if 'l' not in metadata
				1674	]
				1675
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1676	with get_storage(base_url, namespace) as storage:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1677	storage.upload_items(items)
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	1678	return 0
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1679
				1680
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1681	def load_isolated(content, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1682	"""Verifies the .isolated file is valid and loads this object with the json
				1683	data.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1684
				1685	Arguments:
				1686	- content: raw serialized content to load.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1687	- algo: hashlib algorithm class. Used to confirm the algorithm matches the
				1688	algorithm used on the Isolate Server.
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1689	"""
				1690	try:
				1691	data = json.loads(content)
				1692	except ValueError:
				1693	raise ConfigError('Failed to parse: %s...' % content[:100])
				1694
				1695	if not isinstance(data, dict):
				1696	raise ConfigError('Expected dict, got %r' % data)
				1697
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1698	# Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1699	value = data.get('version', '1.0')
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1700	if not isinstance(value, basestring):
				1701	raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1702	try:
				1703	version = tuple(map(int, value.split('.')))
				1704	except ValueError:
				1705	raise ConfigError('Expected valid version, got %r' % value)
				1706
				1707	expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
				1708	# Major version must match.
				1709	if version[0] != expected_version[0]:
Marc-Antoine Ruel	1c1edd6	2013-12-06 09:13:13 -0500	[diff] [blame]	1710	raise ConfigError(
				1711	'Expected compatible \'%s\' version, got %r' %
				1712	(ISOLATED_FILE_VERSION, value))
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1713
				1714	if algo is None:
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	1715	# TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1716	# Default the algorithm used in the .isolated file itself, falls back to
				1717	# 'sha-1' if unspecified.
				1718	algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
				1719
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1720	for key, value in data.iteritems():
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1721	if key == 'algo':
				1722	if not isinstance(value, basestring):
				1723	raise ConfigError('Expected string, got %r' % value)
				1724	if value not in SUPPORTED_ALGOS:
				1725	raise ConfigError(
				1726	'Expected one of \'%s\', got %r' %
				1727	(', '.join(sorted(SUPPORTED_ALGOS)), value))
				1728	if value != SUPPORTED_ALGOS_REVERSE[algo]:
				1729	raise ConfigError(
				1730	'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
				1731
				1732	elif key == 'command':
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1733	if not isinstance(value, list):
				1734	raise ConfigError('Expected list, got %r' % value)
				1735	if not value:
				1736	raise ConfigError('Expected non-empty command')
				1737	for subvalue in value:
				1738	if not isinstance(subvalue, basestring):
				1739	raise ConfigError('Expected string, got %r' % subvalue)
				1740
				1741	elif key == 'files':
				1742	if not isinstance(value, dict):
				1743	raise ConfigError('Expected dict, got %r' % value)
				1744	for subkey, subvalue in value.iteritems():
				1745	if not isinstance(subkey, basestring):
				1746	raise ConfigError('Expected string, got %r' % subkey)
				1747	if not isinstance(subvalue, dict):
				1748	raise ConfigError('Expected dict, got %r' % subvalue)
				1749	for subsubkey, subsubvalue in subvalue.iteritems():
				1750	if subsubkey == 'l':
				1751	if not isinstance(subsubvalue, basestring):
				1752	raise ConfigError('Expected string, got %r' % subsubvalue)
				1753	elif subsubkey == 'm':
				1754	if not isinstance(subsubvalue, int):
				1755	raise ConfigError('Expected int, got %r' % subsubvalue)
				1756	elif subsubkey == 'h':
				1757	if not is_valid_hash(subsubvalue, algo):
				1758	raise ConfigError('Expected sha-1, got %r' % subsubvalue)
				1759	elif subsubkey == 's':
Marc-Antoine Ruel	aab3a62	2013-11-28 09:47:05 -0500	[diff] [blame]	1760	if not isinstance(subsubvalue, (int, long)):
				1761	raise ConfigError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1762	else:
				1763	raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1764	if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1765	raise ConfigError(
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1766	'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
				1767	subvalue)
				1768	if bool('h' in subvalue) != bool('s' in subvalue):
				1769	raise ConfigError(
				1770	'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
				1771	subvalue)
				1772	if bool('s' in subvalue) == bool('l' in subvalue):
				1773	raise ConfigError(
				1774	'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
				1775	subvalue)
				1776	if bool('l' in subvalue) and bool('m' in subvalue):
				1777	raise ConfigError(
				1778	'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1779	subvalue)
				1780
				1781	elif key == 'includes':
				1782	if not isinstance(value, list):
				1783	raise ConfigError('Expected list, got %r' % value)
				1784	if not value:
				1785	raise ConfigError('Expected non-empty includes list')
				1786	for subvalue in value:
				1787	if not is_valid_hash(subvalue, algo):
				1788	raise ConfigError('Expected sha-1, got %r' % subvalue)
				1789
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1790	elif key == 'os':
				1791	if version >= (1, 4):
				1792	raise ConfigError('Key \'os\' is not allowed starting version 1.4')
				1793
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1794	elif key == 'read_only':
Marc-Antoine Ruel	7124e39	2014-01-09 11:49:21 -0500	[diff] [blame]	1795	if not value in (0, 1, 2):
				1796	raise ConfigError('Expected 0, 1 or 2, got %r' % value)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1797
				1798	elif key == 'relative_cwd':
				1799	if not isinstance(value, basestring):
				1800	raise ConfigError('Expected string, got %r' % value)
				1801
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1802	elif key == 'version':
				1803	# Already checked above.
				1804	pass
				1805
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1806	else:
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1807	raise ConfigError('Unknown key %r' % key)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1808
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1809	# Automatically fix os.path.sep if necessary. While .isolated files are always
				1810	# in the the native path format, someone could want to download an .isolated
				1811	# tree from another OS.
				1812	wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
				1813	if 'files' in data:
				1814	data['files'] = dict(
				1815	(k.replace(wrong_path_sep, os.path.sep), v)
				1816	for k, v in data['files'].iteritems())
				1817	for v in data['files'].itervalues():
				1818	if 'l' in v:
				1819	v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
				1820	if 'relative_cwd' in data:
				1821	data['relative_cwd'] = data['relative_cwd'].replace(
				1822	wrong_path_sep, os.path.sep)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1823	return data
				1824
				1825
				1826	class IsolatedFile(object):
				1827	"""Represents a single parsed .isolated file."""
				1828	def __init__(self, obj_hash, algo):
				1829	"""\|obj_hash\| is really the sha-1 of the file."""
				1830	logging.debug('IsolatedFile(%s)' % obj_hash)
				1831	self.obj_hash = obj_hash
				1832	self.algo = algo
				1833	# Set once all the left-side of the tree is parsed. 'Tree' here means the
				1834	# .isolate and all the .isolated files recursively included by it with
				1835	# 'includes' key. The order of each sha-1 in 'includes', each representing a
				1836	# .isolated file in the hash table, is important, as the later ones are not
				1837	# processed until the firsts are retrieved and read.
				1838	self.can_fetch = False
				1839
				1840	# Raw data.
				1841	self.data = {}
				1842	# A IsolatedFile instance, one per object in self.includes.
				1843	self.children = []
				1844
				1845	# Set once the .isolated file is loaded.
				1846	self._is_parsed = False
				1847	# Set once the files are fetched.
				1848	self.files_fetched = False
				1849
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1850	def load(self, content):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1851	"""Verifies the .isolated file is valid and loads this object with the json
				1852	data.
				1853	"""
				1854	logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
				1855	assert not self._is_parsed
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1856	self.data = load_isolated(content, self.algo)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1857	self.children = [
				1858	IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
				1859	]
				1860	self._is_parsed = True
				1861
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1862	def fetch_files(self, fetch_queue, files):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1863	"""Adds files in this .isolated file not present in \|files\| dictionary.
				1864
				1865	Preemptively request files.
				1866
				1867	Note that \|files\| is modified by this function.
				1868	"""
				1869	assert self.can_fetch
				1870	if not self._is_parsed or self.files_fetched:
				1871	return
				1872	logging.debug('fetch_files(%s)' % self.obj_hash)
				1873	for filepath, properties in self.data.get('files', {}).iteritems():
				1874	# Root isolated has priority on the files being mapped. In particular,
				1875	# overriden files must not be fetched.
				1876	if filepath not in files:
				1877	files[filepath] = properties
				1878	if 'h' in properties:
				1879	# Preemptively request files.
				1880	logging.debug('fetching %s' % filepath)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1881	fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1882	self.files_fetched = True
				1883
				1884
				1885	class Settings(object):
				1886	"""Results of a completely parsed .isolated file."""
				1887	def __init__(self):
				1888	self.command = []
				1889	self.files = {}
				1890	self.read_only = None
				1891	self.relative_cwd = None
				1892	# The main .isolated file, a IsolatedFile instance.
				1893	self.root = None
				1894
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1895	def load(self, fetch_queue, root_isolated_hash, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1896	"""Loads the .isolated and all the included .isolated asynchronously.
				1897
				1898	It enables support for "included" .isolated files. They are processed in
				1899	strict order but fetched asynchronously from the cache. This is important so
				1900	that a file in an included .isolated file that is overridden by an embedding
				1901	.isolated file is not fetched needlessly. The includes are fetched in one
				1902	pass and the files are fetched as soon as all the ones on the left-side
				1903	of the tree were fetched.
				1904
				1905	The prioritization is very important here for nested .isolated files.
				1906	'includes' have the highest priority and the algorithm is optimized for both
				1907	deep and wide trees. A deep one is a long link of .isolated files referenced
				1908	one at a time by one item in 'includes'. A wide one has a large number of
				1909	'includes' in a single .isolated file. 'left' is defined as an included
				1910	.isolated file earlier in the 'includes' list. So the order of the elements
				1911	in 'includes' is important.
				1912	"""
				1913	self.root = IsolatedFile(root_isolated_hash, algo)
				1914
				1915	# Isolated files being retrieved now: hash -> IsolatedFile instance.
				1916	pending = {}
				1917	# Set of hashes of already retrieved items to refuse recursive includes.
				1918	seen = set()
				1919
				1920	def retrieve(isolated_file):
				1921	h = isolated_file.obj_hash
				1922	if h in seen:
				1923	raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
				1924	assert h not in pending
				1925	seen.add(h)
				1926	pending[h] = isolated_file
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1927	fetch_queue.add(h, priority=WorkerPool.HIGH)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1928
				1929	retrieve(self.root)
				1930
				1931	while pending:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1932	item_hash = fetch_queue.wait(pending)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1933	item = pending.pop(item_hash)
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1934	item.load(fetch_queue.cache.read(item_hash))
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1935	if item_hash == root_isolated_hash:
				1936	# It's the root item.
				1937	item.can_fetch = True
				1938
				1939	for new_child in item.children:
				1940	retrieve(new_child)
				1941
				1942	# Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1943	self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1944
				1945	def check(n):
				1946	return all(check(x) for x in n.children) and n.files_fetched
				1947	assert check(self.root)
				1948
				1949	self.relative_cwd = self.relative_cwd or ''
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1950
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1951	def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1952	if node.can_fetch:
				1953	if not node.files_fetched:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1954	self._update_self(fetch_queue, node)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1955	will_break = False
				1956	for i in node.children:
				1957	if not i.can_fetch:
				1958	if will_break:
				1959	break
				1960	# Automatically mark the first one as fetcheable.
				1961	i.can_fetch = True
				1962	will_break = True
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1963	self._traverse_tree(fetch_queue, i)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1964
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1965	def _update_self(self, fetch_queue, node):
				1966	node.fetch_files(fetch_queue, self.files)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1967	# Grabs properties.
				1968	if not self.command and node.data.get('command'):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1969	# Ensure paths are correctly separated on windows.
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1970	self.command = node.data['command']
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1971	if self.command:
				1972	self.command[0] = self.command[0].replace('/', os.path.sep)
				1973	self.command = tools.fix_python_path(self.command)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1974	if self.read_only is None and node.data.get('read_only') is not None:
				1975	self.read_only = node.data['read_only']
				1976	if (self.relative_cwd is None and
				1977	node.data.get('relative_cwd') is not None):
				1978	self.relative_cwd = node.data['relative_cwd']
				1979
				1980
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1981	def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1982	"""Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1983
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1984	Arguments:
				1985	isolated_hash: hash of the root *.isolated file.
				1986	storage: Storage class that communicates with isolate storage.
				1987	cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1988	outdir: Output directory to map file tree to.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1989	require_command: Ensure *.isolated specifies a command to run.
				1990
				1991	Returns:
				1992	Settings object that holds details about loaded *.isolated file.
				1993	"""
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	1994	logging.debug(
				1995	'fetch_isolated(%s, %s, %s, %s, %s)',
				1996	isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1997	# Hash algorithm to use, defined by namespace \|storage\| is using.
				1998	algo = storage.hash_algo
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1999	with cache:
				2000	fetch_queue = FetchQueue(storage, cache)
				2001	settings = Settings()
				2002
				2003	with tools.Profiler('GetIsolateds'):
				2004	# Optionally support local files by manually adding them to cache.
				2005	if not is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	2006	logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
				2007	try:
				2008	isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
				2009	except IOError:
				2010	raise MappingError(
				2011	'%s doesn\'t seem to be a valid file. Did you intent to pass a '
				2012	'valid hash?' % isolated_hash)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2013
				2014	# Load all *.isolated and start loading rest of the files.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	2015	settings.load(fetch_queue, isolated_hash, algo)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2016	if require_command and not settings.command:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2017	# TODO(vadimsh): All fetch operations are already enqueue and there's no
				2018	# easy way to cancel them.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2019	raise ConfigError('No command to run')
				2020
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2021	with tools.Profiler('GetRest'):
				2022	# Create file system hierarchy.
				2023	if not os.path.isdir(outdir):
				2024	os.makedirs(outdir)
				2025	create_directories(outdir, settings.files)
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	2026	create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2027
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2028	# Ensure working directory exists.
				2029	cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
				2030	if not os.path.isdir(cwd):
				2031	os.makedirs(cwd)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2032
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2033	# Multimap: digest -> list of pairs (path, props).
				2034	remaining = {}
				2035	for filepath, props in settings.files.iteritems():
				2036	if 'h' in props:
				2037	remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2038
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2039	# Now block on the remaining files to be downloaded and mapped.
				2040	logging.info('Retrieving remaining files (%d of them)...',
				2041	fetch_queue.pending_count)
				2042	last_update = time.time()
				2043	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
				2044	while remaining:
				2045	detector.ping()
				2046
				2047	# Wait for any item to finish fetching to cache.
				2048	digest = fetch_queue.wait(remaining)
				2049
				2050	# Link corresponding files to a fetched item in cache.
				2051	for filepath, props in remaining.pop(digest):
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	2052	cache.hardlink(
				2053	digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2054
				2055	# Report progress.
				2056	duration = time.time() - last_update
				2057	if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
				2058	msg = '%d files remaining...' % len(remaining)
				2059	print msg
				2060	logging.info(msg)
				2061	last_update = time.time()
				2062
				2063	# Cache could evict some items we just tried to fetch, it's a fatal error.
				2064	if not fetch_queue.verify_all_cached():
				2065	raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2066	return settings
				2067
				2068
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2069	def directory_to_metadata(root, algo, blacklist):
				2070	"""Returns the FileItem list and .isolated metadata for a directory."""
				2071	root = file_path.get_native_path_case(root)
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	2072	paths = expand_directory_and_symlink(
				2073	root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2074	metadata = dict(
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	2075	(relpath, process_input(os.path.join(root, relpath), {}, False, algo))
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	2076	for relpath in paths
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2077	)
				2078	for v in metadata.itervalues():
				2079	v.pop('t')
				2080	items = [
				2081	FileItem(
				2082	path=os.path.join(root, relpath),
				2083	digest=meta['h'],
				2084	size=meta['s'],
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2085	high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2086	for relpath, meta in metadata.iteritems() if 'h' in meta
				2087	]
				2088	return items, metadata
				2089
				2090
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2091	def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2092	"""Stores every entries and returns the relevant data.
				2093
				2094	Arguments:
				2095	storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2096	files: list of file paths to upload. If a directory is specified, a
				2097	.isolated file is created and its hash is returned.
				2098	blacklist: function that returns True if a file should be omitted.
				2099	"""
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2100	assert all(isinstance(i, unicode) for i in files), files
				2101	if len(files) != len(set(map(os.path.abspath, files))):
				2102	raise Error('Duplicate entries found.')
				2103
				2104	results = []
				2105	# The temporary directory is only created as needed.
				2106	tempdir = None
				2107	try:
				2108	# TODO(maruel): Yield the files to a worker thread.
				2109	items_to_upload = []
				2110	for f in files:
				2111	try:
				2112	filepath = os.path.abspath(f)
				2113	if os.path.isdir(filepath):
				2114	# Uploading a whole directory.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2115	items, metadata = directory_to_metadata(
				2116	filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2117
				2118	# Create the .isolated file.
				2119	if not tempdir:
				2120	tempdir = tempfile.mkdtemp(prefix='isolateserver')
				2121	handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
				2122	os.close(handle)
				2123	data = {
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2124	'algo': SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2125	'files': metadata,
Marc-Antoine Ruel	1c1edd6	2013-12-06 09:13:13 -0500	[diff] [blame]	2126	'version': ISOLATED_FILE_VERSION,
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2127	}
				2128	save_isolated(isolated, data)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2129	h = hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2130	items_to_upload.extend(items)
				2131	items_to_upload.append(
				2132	FileItem(
				2133	path=isolated,
				2134	digest=h,
				2135	size=os.stat(isolated).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2136	high_priority=True))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2137	results.append((h, f))
				2138
				2139	elif os.path.isfile(filepath):
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2140	h = hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2141	items_to_upload.append(
				2142	FileItem(
				2143	path=filepath,
				2144	digest=h,
				2145	size=os.stat(filepath).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2146	high_priority=f.endswith('.isolated')))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2147	results.append((h, f))
				2148	else:
				2149	raise Error('%s is neither a file or directory.' % f)
				2150	except OSError:
				2151	raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2152	# Technically we would care about which files were uploaded but we don't
				2153	# much in practice.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2154	_uploaded_files = storage.upload_items(items_to_upload)
				2155	return results
				2156	finally:
				2157	if tempdir:
				2158	shutil.rmtree(tempdir)
				2159
				2160
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2161	def archive(out, namespace, files, blacklist):
				2162	if files == ['-']:
				2163	files = sys.stdin.readlines()
				2164
				2165	if not files:
				2166	raise Error('Nothing to upload')
				2167
				2168	files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2169	blacklist = tools.gen_blacklist(blacklist)
				2170	with get_storage(out, namespace) as storage:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2171	results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2172	print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
				2173
				2174
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2175	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2176	def CMDarchive(parser, args):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2177	"""Archives data to the server.
				2178
				2179	If a directory is specified, a .isolated file is created the whole directory
				2180	is uploaded. Then this .isolated file can be included in another one to run
				2181	commands.
				2182
				2183	The commands output each file that was processed with its content hash. For
				2184	directories, the .isolated generated for the directory is listed as the
				2185	directory entry itself.
				2186	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2187	add_isolate_server_options(parser, False)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2188	parser.add_option(
				2189	'--blacklist',
				2190	action='append', default=list(DEFAULT_BLACKLIST),
				2191	help='List of regexp to use as blacklist filter when uploading '
				2192	'directories')
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2193	options, files = parser.parse_args(args)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2194	process_isolate_server_options(parser, options)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2195	try:
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2196	archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2197	except Error as e:
				2198	parser.error(e.args[0])
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2199	return 0
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2200
				2201
				2202	def CMDdownload(parser, args):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2203	"""Download data from the server.
				2204
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2205	It can either download individual files or a complete tree from a .isolated
				2206	file.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2207	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2208	add_isolate_server_options(parser, True)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2209	parser.add_option(
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2210	'-i', '--isolated', metavar='HASH',
				2211	help='hash of an isolated file, .isolated file content is discarded, use '
				2212	'--file if you need it')
				2213	parser.add_option(
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2214	'-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
				2215	help='hash and destination of a file, can be used multiple times')
				2216	parser.add_option(
				2217	'-t', '--target', metavar='DIR', default=os.getcwd(),
				2218	help='destination directory')
				2219	options, args = parser.parse_args(args)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2220	process_isolate_server_options(parser, options)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2221	if args:
				2222	parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2223	if bool(options.isolated) == bool(options.file):
				2224	parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2225
				2226	options.target = os.path.abspath(options.target)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2227
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2228	remote = options.isolate_server or options.indir
				2229	with get_storage(remote, options.namespace) as storage:
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2230	# Fetching individual files.
				2231	if options.file:
				2232	channel = threading_utils.TaskChannel()
				2233	pending = {}
				2234	for digest, dest in options.file:
				2235	pending[digest] = dest
				2236	storage.async_fetch(
				2237	channel,
				2238	WorkerPool.MED,
				2239	digest,
				2240	UNKNOWN_FILE_SIZE,
				2241	functools.partial(file_write, os.path.join(options.target, dest)))
				2242	while pending:
				2243	fetched = channel.pull()
				2244	dest = pending.pop(fetched)
				2245	logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2246
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2247	# Fetching whole isolated tree.
				2248	if options.isolated:
				2249	settings = fetch_isolated(
				2250	isolated_hash=options.isolated,
				2251	storage=storage,
				2252	cache=MemoryCache(),
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2253	outdir=options.target,
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2254	require_command=False)
				2255	rel = os.path.join(options.target, settings.relative_cwd)
				2256	print('To run this test please run from the directory %s:' %
				2257	os.path.join(options.target, rel))
				2258	print(' ' + ' '.join(settings.command))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2259
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2260	return 0
				2261
				2262
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2263	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2264	def CMDhashtable(parser, args):
				2265	"""Archives data to a hashtable on the file system.
				2266
				2267	If a directory is specified, a .isolated file is created the whole directory
				2268	is uploaded. Then this .isolated file can be included in another one to run
				2269	commands.
				2270
				2271	The commands output each file that was processed with its content hash. For
				2272	directories, the .isolated generated for the directory is listed as the
				2273	directory entry itself.
				2274	"""
				2275	add_outdir_options(parser)
				2276	parser.add_option(
				2277	'--blacklist',
				2278	action='append', default=list(DEFAULT_BLACKLIST),
				2279	help='List of regexp to use as blacklist filter when uploading '
				2280	'directories')
				2281	options, files = parser.parse_args(args)
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2282	process_outdir_options(parser, options, os.getcwd())
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2283	try:
				2284	# Do not compress files when archiving to the file system.
				2285	archive(options.outdir, 'default', files, options.blacklist)
				2286	except Error as e:
				2287	parser.error(e.args[0])
				2288	return 0
				2289
				2290
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2291	def add_isolate_server_options(parser, add_indir):
				2292	"""Adds --isolate-server and --namespace options to parser.
				2293
				2294	Includes --indir if desired.
				2295	"""
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2296	parser.add_option(
				2297	'-I', '--isolate-server',
				2298	metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2299	help='URL of the Isolate Server to use. Defaults to the environment '
				2300	'variable ISOLATE_SERVER if set. No need to specify https://, this '
				2301	'is assumed.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2302	parser.add_option(
				2303	'--namespace', default='default-gzip',
				2304	help='The namespace to use on the Isolate Server, default: %default')
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2305	if add_indir:
				2306	parser.add_option(
				2307	'--indir', metavar='DIR',
				2308	help='Directory used to store the hashtable instead of using an '
				2309	'isolate server.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2310
				2311
				2312	def process_isolate_server_options(parser, options):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2313	"""Processes the --isolate-server and --indir options and aborts if neither is
				2314	specified.
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2315	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2316	has_indir = hasattr(options, 'indir')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2317	if not options.isolate_server:
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2318	if not has_indir:
				2319	parser.error('--isolate-server is required.')
				2320	elif not options.indir:
				2321	parser.error('Use one of --indir or --isolate-server.')
				2322	else:
				2323	if has_indir and options.indir:
				2324	parser.error('Use only one of --indir or --isolate-server.')
				2325
				2326	if options.isolate_server:
				2327	parts = urlparse.urlparse(options.isolate_server, 'https')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2328	if parts.query:
				2329	parser.error('--isolate-server doesn\'t support query parameter.')
				2330	if parts.fragment:
				2331	parser.error('--isolate-server doesn\'t support fragment in the url.')
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2332	# urlparse('foo.com') will result in netloc='', path='foo.com', which is not
				2333	# what is desired here.
				2334	new = list(parts)
				2335	if not new[1] and new[2]:
				2336	new[1] = new[2].rstrip('/')
				2337	new[2] = ''
				2338	new[2] = new[2].rstrip('/')
				2339	options.isolate_server = urlparse.urlunparse(new)
				2340	return
				2341
				2342	if file_path.is_url(options.indir):
				2343	parser.error('Can\'t use an URL for --indir.')
				2344	options.indir = unicode(options.indir).replace('/', os.path.sep)
				2345	options.indir = os.path.abspath(
				2346	os.path.normpath(os.path.join(os.getcwd(), options.indir)))
				2347	if not os.path.isdir(options.indir):
				2348	parser.error('Path given to --indir must exist.')
				2349
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2350
				2351
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2352	def add_outdir_options(parser):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2353	"""Adds --outdir, which is orthogonal to --isolate-server.
				2354
				2355	Note: On upload, separate commands are used between 'archive' and 'hashtable'.
				2356	On 'download', the same command can download from either an isolate server or
				2357	a file system.
				2358	"""
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2359	parser.add_option(
				2360	'-o', '--outdir', metavar='DIR',
				2361	help='Directory used to recreate the tree.')
				2362
				2363
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2364	def process_outdir_options(parser, options, cwd):
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2365	if not options.outdir:
				2366	parser.error('--outdir is required.')
				2367	if file_path.is_url(options.outdir):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2368	parser.error('Can\'t use an URL for --outdir.')
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2369	options.outdir = unicode(options.outdir).replace('/', os.path.sep)
				2370	# outdir doesn't need native path case since tracing is never done from there.
				2371	options.outdir = os.path.abspath(
				2372	os.path.normpath(os.path.join(cwd, options.outdir)))
				2373	# In theory, we'd create the directory outdir right away. Defer doing it in
				2374	# case there's errors in the command line.
				2375
				2376
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2377	class OptionParserIsolateServer(tools.OptionParserWithLogging):
				2378	def __init__(self, **kwargs):
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	2379	tools.OptionParserWithLogging.__init__(
				2380	self,
				2381	version=__version__,
				2382	prog=os.path.basename(sys.modules[__name__].__file__),
				2383	**kwargs)
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	2384	auth.add_auth_options(self)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2385
				2386	def parse_args(self, args, *kwargs):
				2387	options, args = tools.OptionParserWithLogging.parse_args(
				2388	self, args, *kwargs)
Vadim Shtayura	5d1efce	2014-02-04 10:55:43 -0800	[diff] [blame]	2389	auth.process_auth_options(self, options)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2390	return options, args
				2391
				2392
				2393	def main(args):
				2394	dispatcher = subcommand.CommandDispatcher(__name__)
				2395	try:
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	2396	return dispatcher.execute(OptionParserIsolateServer(), args)
vadimsh@chromium.org	d908a54	2013-10-30 01:36:17 +0000	[diff] [blame]	2397	except Exception as e:
				2398	tools.report_error(e)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2399	return 1
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	2400
				2401
				2402	if __name__ == '__main__':
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2403	fix_encoding.fix_encoding()
				2404	tools.disable_buffering()
				2405	colorama.init()
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2406	sys.exit(main(sys.argv[1:]))