Blame - isolateserver.py - chromium.googlesource.com/infra/luci/client-py

blob: 25bf34a512f12e87bc508696089567b88d1066c4 [file] [log] [blame]

maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1	#!/usr/bin/env python
Marc-Antoine Ruel	8add124	2013-11-05 17:28:27 -0500	[diff] [blame]	2	# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruel	e98b112	2013-11-05 20:27:57 -0500	[diff] [blame]	3	# Use of this source code is governed under the Apache License, Version 2.0 that
				4	# can be found in the LICENSE file.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	5
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	6	"""Archives a set of files or directories to a server."""
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	7
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	8	__version__ = '0.3.4'
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	9
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	10	import functools
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	11	import json
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	12	import logging
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	13	import os
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	14	import re
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	15	import shutil
				16	import stat
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	17	import sys
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	18	import tempfile
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	19	import threading
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	20	import time
maruel@chromium.org	e82112e	2013-04-24 14:41:55 +0000	[diff] [blame]	21	import urllib
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	22	import urlparse
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	23	import zlib
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	24
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	25	from third_party import colorama
				26	from third_party.depot_tools import fix_encoding
				27	from third_party.depot_tools import subcommand
				28
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	29	from utils import file_path
vadimsh@chromium.org	6b70621	2013-08-28 15:03:46 +0000	[diff] [blame]	30	from utils import net
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	31	from utils import on_error
vadimsh@chromium.org	b074b16	2013-08-22 17:55:46 +0000	[diff] [blame]	32	from utils import threading_utils
vadimsh@chromium.org	a432647	2013-08-24 02:05:41 +0000	[diff] [blame]	33	from utils import tools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	34
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	35	import auth
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	36	import isolated_format
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	37
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	38
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	39	# Version of isolate protocol passed to the server in /handshake request.
				40	ISOLATE_PROTOCOL_VERSION = '1.0'
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	41
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	42
				43	# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	44	# All files are sorted by likelihood of a change in the file content
				45	# (currently file size is used to estimate this: larger the file -> larger the
				46	# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	47	# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	48	# and so on. Numbers here is a trade-off; the more per request, the lower the
				49	# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
				50	# larger values cause longer lookups, increasing the initial latency to start
				51	# uploading, which is especially an issue for large files. This value is
				52	# optimized for the "few thousands files to look up with minimal number of large
				53	# files missing" case.
				54	ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org	07fa759	2013-01-11 18:19:30 +0000	[diff] [blame]	55
maruel@chromium.org	9958e4a	2013-09-17 00:01:48 +0000	[diff] [blame]	56
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	57	# A list of already compressed extension types that should not receive any
				58	# compression before being uploaded.
				59	ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel	7f234c8	2014-08-06 21:55:18 -0400	[diff] [blame]	60	'7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
				61	'png', 'wav', 'zip',
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	62	]
				63
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	64
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	65	# The file size to be used when we don't know the correct file size,
				66	# generally used for .isolated files.
				67	UNKNOWN_FILE_SIZE = None
				68
				69
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	70	# Chunk size to use when reading from network stream.
				71	NET_IO_FILE_CHUNK = 16 * 1024
				72
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	73
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	74	# Read timeout in seconds for downloads from isolate storage. If there's no
				75	# response from the server within this timeout whole download will be aborted.
				76	DOWNLOAD_READ_TIMEOUT = 60
				77
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	78	# Maximum expected delay (in seconds) between successive file fetches
				79	# in run_tha_test. If it takes longer than that, a deadlock might be happening
				80	# and all stack frames for all threads are dumped to log.
				81	DEADLOCK_TIMEOUT = 5 * 60
				82
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	83
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	84	# The delay (in seconds) to wait between logging statements when retrieving
				85	# the required files. This is intended to let the user (or buildbot) know that
				86	# the program is still running.
				87	DELAY_BETWEEN_UPDATES_IN_SECS = 30
				88
				89
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	90	DEFAULT_BLACKLIST = (
				91	# Temporary vim or python files.
				92	r'^.+\.(?:pyc\|swp)$',
				93	# .git or .svn directory.
				94	r'^(?:.+' + re.escape(os.path.sep) + r'\|)\.(?:git\|svn)$',
				95	)
				96
				97
				98	# Chromium-specific.
				99	DEFAULT_BLACKLIST += (
				100	r'^.+\.(?:run_test_cases)$',
				101	r'^(?:.+' + re.escape(os.path.sep) + r'\|)testserver\.log$',
				102	)
				103
				104
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	105	class Error(Exception):
				106	"""Generic runtime error."""
				107	pass
				108
				109
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	110	class ConfigError(ValueError):
				111	"""Generic failure to load a .isolated file."""
				112	pass
				113
				114
				115	class MappingError(OSError):
				116	"""Failed to recreate the tree."""
				117	pass
				118
				119
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	120	def stream_read(stream, chunk_size):
				121	"""Reads chunks from \|stream\| and yields them."""
				122	while True:
				123	data = stream.read(chunk_size)
				124	if not data:
				125	break
				126	yield data
				127
				128
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	129	def file_read(filepath, chunk_size=isolated_format.DISK_FILE_CHUNK, offset=0):
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	130	"""Yields file content in chunks of \|chunk_size\| starting from \|offset\|."""
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	131	with open(filepath, 'rb') as f:
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	132	if offset:
				133	f.seek(offset)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	134	while True:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	135	data = f.read(chunk_size)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	136	if not data:
				137	break
				138	yield data
				139
				140
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	141	def file_write(filepath, content_generator):
				142	"""Writes file content as generated by content_generator.
				143
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	144	Creates the intermediary directory as needed.
				145
				146	Returns the number of bytes written.
				147
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	148	Meant to be mocked out in unit tests.
				149	"""
				150	filedir = os.path.dirname(filepath)
				151	if not os.path.isdir(filedir):
				152	os.makedirs(filedir)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	153	total = 0
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	154	with open(filepath, 'wb') as f:
				155	for d in content_generator:
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	156	total += len(d)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	157	f.write(d)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	158	return total
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	159
				160
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	161	def zip_compress(content_generator, level=7):
				162	"""Reads chunks from \|content_generator\| and yields zip compressed chunks."""
				163	compressor = zlib.compressobj(level)
				164	for chunk in content_generator:
				165	compressed = compressor.compress(chunk)
				166	if compressed:
				167	yield compressed
				168	tail = compressor.flush(zlib.Z_FINISH)
				169	if tail:
				170	yield tail
				171
				172
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	173	def zip_decompress(
				174	content_generator, chunk_size=isolated_format.DISK_FILE_CHUNK):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	175	"""Reads zipped data from \|content_generator\| and yields decompressed data.
				176
				177	Decompresses data in small chunks (no larger than \|chunk_size\|) so that
				178	zip bomb file doesn't cause zlib to preallocate huge amount of memory.
				179
				180	Raises IOError if data is corrupted or incomplete.
				181	"""
				182	decompressor = zlib.decompressobj()
				183	compressed_size = 0
				184	try:
				185	for chunk in content_generator:
				186	compressed_size += len(chunk)
				187	data = decompressor.decompress(chunk, chunk_size)
				188	if data:
				189	yield data
				190	while decompressor.unconsumed_tail:
				191	data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
				192	if data:
				193	yield data
				194	tail = decompressor.flush()
				195	if tail:
				196	yield tail
				197	except zlib.error as e:
				198	raise IOError(
				199	'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
				200	# Ensure all data was read and decompressed.
				201	if decompressor.unused_data or decompressor.unconsumed_tail:
				202	raise IOError('Not all data was decompressed')
				203
				204
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	205	def get_zip_compression_level(filename):
				206	"""Given a filename calculates the ideal zip compression level to use."""
				207	file_ext = os.path.splitext(filename)[1].lower()
				208	# TODO(csharp): Profile to find what compression level works best.
				209	return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
				210
				211
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	212	def create_directories(base_directory, files):
				213	"""Creates the directory structure needed by the given list of files."""
				214	logging.debug('create_directories(%s, %d)', base_directory, len(files))
				215	# Creates the tree of directories to create.
				216	directories = set(os.path.dirname(f) for f in files)
				217	for item in list(directories):
				218	while item:
				219	directories.add(item)
				220	item = os.path.dirname(item)
				221	for d in sorted(directories):
				222	if d:
				223	os.mkdir(os.path.join(base_directory, d))
				224
				225
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	226	def create_symlinks(base_directory, files):
				227	"""Creates any symlinks needed by the given set of files."""
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	228	for filepath, properties in files:
				229	if 'l' not in properties:
				230	continue
				231	if sys.platform == 'win32':
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	232	# TODO(maruel): Create symlink via the win32 api.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	233	logging.warning('Ignoring symlink %s', filepath)
				234	continue
				235	outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	236	# os.symlink() doesn't exist on Windows.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	237	os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	238
				239
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	240	def is_valid_file(filepath, size):
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	241	"""Determines if the given files appears valid.
				242
				243	Currently it just checks the file's size.
				244	"""
				245	if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	246	return os.path.isfile(filepath)
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	247	actual_size = os.stat(filepath).st_size
				248	if size != actual_size:
				249	logging.warning(
				250	'Found invalid item %s; %d != %d',
				251	os.path.basename(filepath), actual_size, size)
				252	return False
				253	return True
				254
				255
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	256	class WorkerPool(threading_utils.AutoRetryThreadPool):
				257	"""Thread pool that automatically retries on IOError and runs a preconfigured
				258	function.
				259	"""
				260	# Initial and maximum number of worker threads.
				261	INITIAL_WORKERS = 2
				262	MAX_WORKERS = 16
				263	RETRIES = 5
				264
				265	def __init__(self):
				266	super(WorkerPool, self).__init__(
				267	[IOError],
				268	self.RETRIES,
				269	self.INITIAL_WORKERS,
				270	self.MAX_WORKERS,
				271	0,
				272	'remote')
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	273
				274
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	275	class Item(object):
				276	"""An item to push to Storage.
				277
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	278	Its digest and size may be provided in advance, if known. Otherwise they will
				279	be derived from content(). If digest is provided, it MUST correspond to
				280	hash algorithm used by Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	281
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	282	When used with Storage, Item starts its life in a main thread, travels
				283	to 'contains' thread, then to 'push' thread and then finally back to
				284	the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	285	"""
				286
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	287	def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	288	self.digest = digest
				289	self.size = size
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	290	self.high_priority = high_priority
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	291	self.compression_level = 6
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	292
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	293	def content(self):
				294	"""Iterable with content of this item as byte string (str) chunks."""
				295	raise NotImplementedError()
				296
				297	def prepare(self, hash_algo):
				298	"""Ensures self.digest and self.size are set.
				299
				300	Uses content() as a source of data to calculate them. Does nothing if digest
				301	and size is already known.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	302
				303	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	304	hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	305	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	306	if self.digest is None or self.size is None:
				307	digest = hash_algo()
				308	total = 0
				309	for chunk in self.content():
				310	digest.update(chunk)
				311	total += len(chunk)
				312	self.digest = digest.hexdigest()
				313	self.size = total
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	314
				315
				316	class FileItem(Item):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	317	"""A file to push to Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	318
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	319	Its digest and size may be provided in advance, if known. Otherwise they will
				320	be derived from the file content.
				321	"""
				322
				323	def __init__(self, path, digest=None, size=None, high_priority=False):
				324	super(FileItem, self).__init__(
				325	digest,
				326	size if size is not None else os.stat(path).st_size,
				327	high_priority)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	328	self.path = path
				329	self.compression_level = get_zip_compression_level(path)
				330
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	331	def content(self):
				332	return file_read(self.path)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	333
				334
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	335	class BufferItem(Item):
				336	"""A byte buffer to push to Storage."""
				337
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	338	def __init__(self, buf, high_priority=False):
				339	super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	340	self.buffer = buf
				341
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	342	def content(self):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	343	return [self.buffer]
				344
				345
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	346	class Storage(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	347	"""Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	348
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	349	Implements compression support, parallel 'contains' checks, parallel uploads
				350	and more.
				351
				352	Works only within single namespace (and thus hashing algorithm and compression
				353	scheme are fixed).
				354
				355	Spawns multiple internal threads. Thread safe, but not fork safe.
				356	"""
				357
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	358	def __init__(self, storage_api):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	359	self._storage_api = storage_api
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	360	self._use_zip = is_namespace_with_compression(storage_api.namespace)
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	361	self._hash_algo = isolated_format.get_hash_algo(storage_api.namespace)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	362	self._cpu_thread_pool = None
				363	self._net_thread_pool = None
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	364
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	365	@property
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	366	def hash_algo(self):
				367	"""Hashing algorithm used to name files in storage based on their content.
				368
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	369	Defined by \|namespace\|. See also isolated_format.get_hash_algo().
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	370	"""
				371	return self._hash_algo
				372
				373	@property
				374	def location(self):
				375	"""Location of a backing store that this class is using.
				376
				377	Exact meaning depends on the storage_api type. For IsolateServer it is
				378	an URL of isolate server, for FileSystem is it a path in file system.
				379	"""
				380	return self._storage_api.location
				381
				382	@property
				383	def namespace(self):
				384	"""Isolate namespace used by this storage.
				385
				386	Indirectly defines hashing scheme and compression method used.
				387	"""
				388	return self._storage_api.namespace
				389
				390	@property
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	391	def cpu_thread_pool(self):
				392	"""ThreadPool for CPU-bound tasks like zipping."""
				393	if self._cpu_thread_pool is None:
				394	self._cpu_thread_pool = threading_utils.ThreadPool(
				395	2, max(threading_utils.num_processors(), 2), 0, 'zip')
				396	return self._cpu_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	397
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	398	@property
				399	def net_thread_pool(self):
				400	"""AutoRetryThreadPool for IO-bound tasks, retries IOError."""
				401	if self._net_thread_pool is None:
				402	self._net_thread_pool = WorkerPool()
				403	return self._net_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	404
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	405	def close(self):
				406	"""Waits for all pending tasks to finish."""
				407	if self._cpu_thread_pool:
				408	self._cpu_thread_pool.join()
				409	self._cpu_thread_pool.close()
				410	self._cpu_thread_pool = None
				411	if self._net_thread_pool:
				412	self._net_thread_pool.join()
				413	self._net_thread_pool.close()
				414	self._net_thread_pool = None
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	415
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	416	def __enter__(self):
				417	"""Context manager interface."""
				418	return self
				419
				420	def __exit__(self, _exc_type, _exc_value, _traceback):
				421	"""Context manager interface."""
				422	self.close()
				423	return False
				424
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	425	def upload_items(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	426	"""Uploads a bunch of items to the isolate server.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	427
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	428	It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	429
				430	Arguments:
				431	items: list of Item instances that represents data to upload.
				432
				433	Returns:
				434	List of items that were uploaded. All other items are already there.
				435	"""
				436	# TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
				437	# used by swarming.py. There's no need to spawn multiple threads and try to
				438	# do stuff in parallel: there's nothing to parallelize. 'contains' check and
				439	# 'push' should be performed sequentially in the context of current thread.
				440
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	441	# Ensure all digests are calculated.
				442	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	443	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	444
vadimsh@chromium.org	672cd2b	2013-10-08 17:49:33 +0000	[diff] [blame]	445	# For each digest keep only first Item that matches it. All other items
				446	# are just indistinguishable copies from the point of view of isolate
				447	# server (it doesn't care about paths at all, only content and digests).
				448	seen = {}
				449	duplicates = 0
				450	for item in items:
				451	if seen.setdefault(item.digest, item) is not item:
				452	duplicates += 1
				453	items = seen.values()
				454	if duplicates:
				455	logging.info('Skipped %d duplicated files', duplicates)
				456
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	457	# Enqueue all upload tasks.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	458	missing = set()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	459	uploaded = []
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	460	channel = threading_utils.TaskChannel()
				461	for missing_item, push_state in self.get_missing_items(items):
				462	missing.add(missing_item)
				463	self.async_push(channel, missing_item, push_state)
				464
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	465	# No need to spawn deadlock detector thread if there's nothing to upload.
				466	if missing:
				467	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
				468	# Wait for all started uploads to finish.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	469	while len(uploaded) != len(missing):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	470	detector.ping()
				471	item = channel.pull()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	472	uploaded.append(item)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	473	logging.debug(
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	474	'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	475	logging.info('All files are uploaded')
				476
				477	# Print stats.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	478	total = len(items)
				479	total_size = sum(f.size for f in items)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	480	logging.info(
				481	'Total: %6d, %9.1fkb',
				482	total,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	483	total_size / 1024.)
				484	cache_hit = set(items) - missing
				485	cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	486	logging.info(
				487	'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				488	len(cache_hit),
				489	cache_hit_size / 1024.,
				490	len(cache_hit) * 100. / total,
				491	cache_hit_size * 100. / total_size if total_size else 0)
				492	cache_miss = missing
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	493	cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	494	logging.info(
				495	'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				496	len(cache_miss),
				497	cache_miss_size / 1024.,
				498	len(cache_miss) * 100. / total,
				499	cache_miss_size * 100. / total_size if total_size else 0)
				500
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	501	return uploaded
				502
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	503	def get_fetch_url(self, item):
				504	"""Returns an URL that can be used to fetch given item once it's uploaded.
				505
				506	Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	507
				508	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	509	item: Item to get fetch URL for.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	510
				511	Returns:
				512	An URL or None if underlying protocol doesn't support this.
				513	"""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	514	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	515	return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	516
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	517	def async_push(self, channel, item, push_state):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	518	"""Starts asynchronous push to the server in a parallel thread.
				519
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	520	Can be used only after \|item\| was checked for presence on a server with
				521	'get_missing_items' call. 'get_missing_items' returns \|push_state\| object
				522	that contains storage specific information describing how to upload
				523	the item (for example in case of cloud storage, it is signed upload URLs).
				524
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	525	Arguments:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	526	channel: TaskChannel that receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	527	item: item to upload as instance of Item class.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	528	push_state: push state returned by 'get_missing_items' call for \|item\|.
				529
				530	Returns:
				531	None, but \|channel\| later receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	532	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	533	# Thread pool task priority.
				534	priority = WorkerPool.HIGH if item.high_priority else WorkerPool.MED
				535
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	536	def push(content):
Marc-Antoine Ruel	095a8be	2014-03-21 14:58:19 -0400	[diff] [blame]	537	"""Pushes an Item and returns it to \|channel\|."""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	538	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	539	self._storage_api.push(item, push_state, content)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	540	return item
				541
				542	# If zipping is not required, just start a push task.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	543	if not self._use_zip:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	544	self.net_thread_pool.add_task_with_channel(
				545	channel, priority, push, item.content())
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	546	return
				547
				548	# If zipping is enabled, zip in a separate thread.
				549	def zip_and_push():
				550	# TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
				551	# content right here. It will block until all file is zipped.
				552	try:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	553	stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	554	data = ''.join(stream)
				555	except Exception as exc:
				556	logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	557	channel.send_exception()
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	558	return
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	559	self.net_thread_pool.add_task_with_channel(
				560	channel, priority, push, [data])
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	561	self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	562
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	563	def push(self, item, push_state):
				564	"""Synchronously pushes a single item to the server.
				565
				566	If you need to push many items at once, consider using 'upload_items' or
				567	'async_push' with instance of TaskChannel.
				568
				569	Arguments:
				570	item: item to upload as instance of Item class.
				571	push_state: push state returned by 'get_missing_items' call for \|item\|.
				572
				573	Returns:
				574	Pushed item (same object as \|item\|).
				575	"""
				576	channel = threading_utils.TaskChannel()
				577	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
				578	self.async_push(channel, item, push_state)
				579	pushed = channel.pull()
				580	assert pushed is item
				581	return item
				582
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	583	def async_fetch(self, channel, priority, digest, size, sink):
				584	"""Starts asynchronous fetch from the server in a parallel thread.
				585
				586	Arguments:
				587	channel: TaskChannel that receives back \|digest\| when download ends.
				588	priority: thread pool task priority for the fetch.
				589	digest: hex digest of an item to download.
				590	size: expected size of the item (after decompression).
				591	sink: function that will be called as sink(generator).
				592	"""
				593	def fetch():
				594	try:
				595	# Prepare reading pipeline.
				596	stream = self._storage_api.fetch(digest)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	597	if self._use_zip:
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	598	stream = zip_decompress(stream, isolated_format.DISK_FILE_CHUNK)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	599	# Run \|stream\| through verifier that will assert its size.
				600	verifier = FetchStreamVerifier(stream, size)
				601	# Verified stream goes to \|sink\|.
				602	sink(verifier.run())
				603	except Exception as err:
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	604	logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	605	raise
				606	return digest
				607
				608	# Don't bother with zip_thread_pool for decompression. Decompression is
				609	# really fast and most probably IO bound anyway.
				610	self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
				611
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	612	def get_missing_items(self, items):
				613	"""Yields items that are missing from the server.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	614
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	615	Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	616
				617	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	618	items: a list of Item objects to check.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	619
				620	Yields:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	621	For each missing item it yields a pair (item, push_state), where:
				622	* item - Item object that is missing (one of \|items\|).
				623	* push_state - opaque object that contains storage specific information
				624	describing how to upload the item (for example in case of cloud
				625	storage, it is signed upload URLs). It can later be passed to
				626	'async_push'.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	627	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	628	channel = threading_utils.TaskChannel()
				629	pending = 0
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	630
				631	# Ensure all digests are calculated.
				632	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	633	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	634
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	635	# Enqueue all requests.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	636	for batch in batch_items_for_check(items):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	637	self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
				638	self._storage_api.contains, batch)
				639	pending += 1
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	640
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	641	# Yield results as they come in.
				642	for _ in xrange(pending):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	643	for missing_item, push_state in channel.pull().iteritems():
				644	yield missing_item, push_state
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	645
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	646
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	647	def batch_items_for_check(items):
				648	"""Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	649
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	650	Each batch corresponds to a single 'exists?' query to the server via a call
				651	to StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	652
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	653	Arguments:
				654	items: a list of Item objects.
				655
				656	Yields:
				657	Batches of items to query for existence in a single operation,
				658	each batch is a list of Item objects.
				659	"""
				660	batch_count = 0
				661	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
				662	next_queries = []
				663	for item in sorted(items, key=lambda x: x.size, reverse=True):
				664	next_queries.append(item)
				665	if len(next_queries) == batch_size_limit:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	666	yield next_queries
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	667	next_queries = []
				668	batch_count += 1
				669	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
				670	min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
				671	if next_queries:
				672	yield next_queries
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	673
				674
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	675	class FetchQueue(object):
				676	"""Fetches items from Storage and places them into LocalCache.
				677
				678	It manages multiple concurrent fetch operations. Acts as a bridge between
				679	Storage and LocalCache so that Storage and LocalCache don't depend on each
				680	other at all.
				681	"""
				682
				683	def __init__(self, storage, cache):
				684	self.storage = storage
				685	self.cache = cache
				686	self._channel = threading_utils.TaskChannel()
				687	self._pending = set()
				688	self._accessed = set()
				689	self._fetched = cache.cached_set()
				690
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	691	def add(self, digest, size=UNKNOWN_FILE_SIZE, priority=WorkerPool.MED):
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	692	"""Starts asynchronous fetch of item \|digest\|."""
				693	# Fetching it now?
				694	if digest in self._pending:
				695	return
				696
				697	# Mark this file as in use, verify_all_cached will later ensure it is still
				698	# in cache.
				699	self._accessed.add(digest)
				700
				701	# Already fetched? Notify cache to update item's LRU position.
				702	if digest in self._fetched:
				703	# 'touch' returns True if item is in cache and not corrupted.
				704	if self.cache.touch(digest, size):
				705	return
				706	# Item is corrupted, remove it from cache and fetch it again.
				707	self._fetched.remove(digest)
				708	self.cache.evict(digest)
				709
				710	# TODO(maruel): It should look at the free disk space, the current cache
				711	# size and the size of the new item on every new item:
				712	# - Trim the cache as more entries are listed when free disk space is low,
				713	# otherwise if the amount of data downloaded during the run > free disk
				714	# space, it'll crash.
				715	# - Make sure there's enough free disk space to fit all dependencies of
				716	# this run! If not, abort early.
				717
				718	# Start fetching.
				719	self._pending.add(digest)
				720	self.storage.async_fetch(
				721	self._channel, priority, digest, size,
				722	functools.partial(self.cache.write, digest))
				723
				724	def wait(self, digests):
				725	"""Starts a loop that waits for at least one of \|digests\| to be retrieved.
				726
				727	Returns the first digest retrieved.
				728	"""
				729	# Flush any already fetched items.
				730	for digest in digests:
				731	if digest in self._fetched:
				732	return digest
				733
				734	# Ensure all requested items are being fetched now.
				735	assert all(digest in self._pending for digest in digests), (
				736	digests, self._pending)
				737
				738	# Wait for some requested item to finish fetching.
				739	while self._pending:
				740	digest = self._channel.pull()
				741	self._pending.remove(digest)
				742	self._fetched.add(digest)
				743	if digest in digests:
				744	return digest
				745
				746	# Should never reach this point due to assert above.
				747	raise RuntimeError('Impossible state')
				748
				749	def inject_local_file(self, path, algo):
				750	"""Adds local file to the cache as if it was fetched from storage."""
				751	with open(path, 'rb') as f:
				752	data = f.read()
				753	digest = algo(data).hexdigest()
				754	self.cache.write(digest, [data])
				755	self._fetched.add(digest)
				756	return digest
				757
				758	@property
				759	def pending_count(self):
				760	"""Returns number of items to be fetched."""
				761	return len(self._pending)
				762
				763	def verify_all_cached(self):
				764	"""True if all accessed items are in cache."""
				765	return self._accessed.issubset(self.cache.cached_set())
				766
				767
				768	class FetchStreamVerifier(object):
				769	"""Verifies that fetched file is valid before passing it to the LocalCache."""
				770
				771	def __init__(self, stream, expected_size):
				772	self.stream = stream
				773	self.expected_size = expected_size
				774	self.current_size = 0
				775
				776	def run(self):
				777	"""Generator that yields same items as \|stream\|.
				778
				779	Verifies \|stream\| is complete before yielding a last chunk to consumer.
				780
				781	Also wraps IOError produced by consumer into MappingError exceptions since
				782	otherwise Storage will retry fetch on unrelated local cache errors.
				783	"""
				784	# Read one chunk ahead, keep it in \|stored\|.
				785	# That way a complete stream can be verified before pushing last chunk
				786	# to consumer.
				787	stored = None
				788	for chunk in self.stream:
				789	assert chunk is not None
				790	if stored is not None:
				791	self._inspect_chunk(stored, is_last=False)
				792	try:
				793	yield stored
				794	except IOError as exc:
				795	raise MappingError('Failed to store an item in cache: %s' % exc)
				796	stored = chunk
				797	if stored is not None:
				798	self._inspect_chunk(stored, is_last=True)
				799	try:
				800	yield stored
				801	except IOError as exc:
				802	raise MappingError('Failed to store an item in cache: %s' % exc)
				803
				804	def _inspect_chunk(self, chunk, is_last):
				805	"""Called for each fetched chunk before passing it to consumer."""
				806	self.current_size += len(chunk)
				807	if (is_last and (self.expected_size != UNKNOWN_FILE_SIZE) and
				808	(self.expected_size != self.current_size)):
				809	raise IOError('Incorrect file size: expected %d, got %d' % (
				810	self.expected_size, self.current_size))
				811
				812
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	813	class StorageApi(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	814	"""Interface for classes that implement low-level storage operations.
				815
				816	StorageApi is oblivious of compression and hashing scheme used. This details
				817	are handled in higher level Storage class.
				818
				819	Clients should generally not use StorageApi directly. Storage class is
				820	preferred since it implements compression and upload optimizations.
				821	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	822
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	823	@property
				824	def location(self):
				825	"""Location of a backing store that this class is using.
				826
				827	Exact meaning depends on the type. For IsolateServer it is an URL of isolate
				828	server, for FileSystem is it a path in file system.
				829	"""
				830	raise NotImplementedError()
				831
				832	@property
				833	def namespace(self):
				834	"""Isolate namespace used by this storage.
				835
				836	Indirectly defines hashing scheme and compression method used.
				837	"""
				838	raise NotImplementedError()
				839
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	840	def get_fetch_url(self, digest):
				841	"""Returns an URL that can be used to fetch an item with given digest.
				842
				843	Arguments:
				844	digest: hex digest of item to fetch.
				845
				846	Returns:
				847	An URL or None if the protocol doesn't support this.
				848	"""
				849	raise NotImplementedError()
				850
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	851	def fetch(self, digest, offset=0):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	852	"""Fetches an object and yields its content.
				853
				854	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	855	digest: hash digest of item to download.
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	856	offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	857
				858	Yields:
				859	Chunks of downloaded item (as str objects).
				860	"""
				861	raise NotImplementedError()
				862
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	863	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	864	"""Uploads an \|item\| with content generated by \|content\| generator.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	865
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	866	\|item\| MUST go through 'contains' call to get \|push_state\| before it can
				867	be pushed to the storage.
				868
				869	To be clear, here is one possible usage:
				870	all_items = [... all items to push as Item subclasses ...]
				871	for missing_item, push_state in storage_api.contains(all_items).items():
				872	storage_api.push(missing_item, push_state)
				873
				874	When pushing to a namespace with compression, data that should be pushed
				875	and data provided by the item is not the same. In that case \|content\| is
				876	not None and it yields chunks of compressed data (using item.content() as
				877	a source of original uncompressed data). This is implemented by Storage
				878	class.
				879
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	880	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	881	item: Item object that holds information about an item being pushed.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	882	push_state: push state object as returned by 'contains' call.
				883	content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	884
				885	Returns:
				886	None.
				887	"""
				888	raise NotImplementedError()
				889
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	890	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	891	"""Checks for \|items\| on the server, prepares missing ones for upload.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	892
				893	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	894	items: list of Item objects to check for presence.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	895
				896	Returns:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	897	A dict missing Item -> opaque push state object to be passed to 'push'.
				898	See doc string for 'push'.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	899	"""
				900	raise NotImplementedError()
				901
				902
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	903	class _IsolateServerPushState(object):
				904	"""Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	905
				906	Note this needs to be a global class to support pickling.
				907	"""
				908
				909	def __init__(self, upload_url, finalize_url):
				910	self.upload_url = upload_url
				911	self.finalize_url = finalize_url
				912	self.uploaded = False
				913	self.finalized = False
				914
				915
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	916	class IsolateServer(StorageApi):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	917	"""StorageApi implementation that downloads and uploads to Isolate Server.
				918
				919	It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	920	Works only within single namespace.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	921	"""
				922
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	923	def __init__(self, base_url, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	924	super(IsolateServer, self).__init__()
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	925	assert base_url.startswith('http'), base_url
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	926	self._base_url = base_url.rstrip('/')
				927	self._namespace = namespace
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	928	self._lock = threading.Lock()
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	929	self._server_caps = None
				930
				931	@staticmethod
				932	def _generate_handshake_request():
				933	"""Returns a dict to be sent as handshake request body."""
				934	# TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
				935	return {
				936	'client_app_version': __version__,
				937	'fetcher': True,
				938	'protocol_version': ISOLATE_PROTOCOL_VERSION,
				939	'pusher': True,
				940	}
				941
				942	@staticmethod
				943	def _validate_handshake_response(caps):
				944	"""Validates and normalizes handshake response."""
				945	logging.info('Protocol version: %s', caps['protocol_version'])
				946	logging.info('Server version: %s', caps['server_app_version'])
				947	if caps.get('error'):
				948	raise MappingError(caps['error'])
				949	if not caps['access_token']:
				950	raise ValueError('access_token is missing')
				951	return caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	952
				953	@property
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	954	def _server_capabilities(self):
				955	"""Performs handshake with the server if not yet done.
				956
				957	Returns:
				958	Server capabilities dictionary as returned by /handshake endpoint.
				959
				960	Raises:
				961	MappingError if server rejects the handshake.
				962	"""
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	963	# TODO(maruel): Make this request much earlier asynchronously while the
				964	# files are being enumerated.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	965
				966	# TODO(vadimsh): Put \|namespace\| in the URL so that server can apply
				967	# namespace-level ACLs to this call.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	968	with self._lock:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	969	if self._server_caps is None:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	970	try:
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	971	caps = net.url_read_json(
				972	url=self._base_url + '/content-gs/handshake',
				973	data=self._generate_handshake_request())
				974	if caps is None:
				975	raise MappingError('Failed to perform handshake.')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	976	if not isinstance(caps, dict):
				977	raise ValueError('Expecting JSON dict')
				978	self._server_caps = self._validate_handshake_response(caps)
				979	except (ValueError, KeyError, TypeError) as exc:
				980	# KeyError exception has very confusing str conversion: it's just a
				981	# missing key value and nothing else. So print exception class name
				982	# as well.
				983	raise MappingError('Invalid handshake response (%s): %s' % (
				984	exc.__class__.__name__, exc))
				985	return self._server_caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	986
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	987	@property
				988	def location(self):
				989	return self._base_url
				990
				991	@property
				992	def namespace(self):
				993	return self._namespace
				994
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	995	def get_fetch_url(self, digest):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	996	assert isinstance(digest, basestring)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	997	return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	998	self._base_url, self._namespace, digest)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	999
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1000	def fetch(self, digest, offset=0):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1001	source_url = self.get_fetch_url(digest)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1002	logging.debug('download_file(%s, %d)', source_url, offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1003
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1004	connection = net.url_open(
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1005	source_url,
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1006	read_timeout=DOWNLOAD_READ_TIMEOUT,
				1007	headers={'Range': 'bytes=%d-' % offset} if offset else None)
				1008
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1009	if not connection:
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	1010	raise IOError('Request failed - %s' % source_url)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1011
				1012	# If \|offset\| is used, verify server respects it by checking Content-Range.
				1013	if offset:
				1014	content_range = connection.get_header('Content-Range')
				1015	if not content_range:
				1016	raise IOError('Missing Content-Range header')
				1017
				1018	# 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
				1019	# According to a spec, <size> can be '*' meaning "Total size of the file
				1020	# is not known in advance".
				1021	try:
				1022	match = re.match(r'bytes (\d+)-(\d+)/(\d+\|\*)', content_range)
				1023	if not match:
				1024	raise ValueError()
				1025	content_offset = int(match.group(1))
				1026	last_byte_index = int(match.group(2))
				1027	size = None if match.group(3) == '*' else int(match.group(3))
				1028	except ValueError:
				1029	raise IOError('Invalid Content-Range header: %s' % content_range)
				1030
				1031	# Ensure returned offset equals requested one.
				1032	if offset != content_offset:
				1033	raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
				1034	offset, content_offset, content_range))
				1035
				1036	# Ensure entire tail of the file is returned.
				1037	if size is not None and last_byte_index + 1 != size:
				1038	raise IOError('Incomplete response. Content-Range: %s' % content_range)
				1039
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1040	return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1041
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1042	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1043	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1044	assert item.digest is not None
				1045	assert item.size is not None
				1046	assert isinstance(push_state, _IsolateServerPushState)
				1047	assert not push_state.finalized
				1048
				1049	# Default to item.content().
				1050	content = item.content() if content is None else content
				1051
				1052	# Do not iterate byte by byte over 'str'. Push it all as a single chunk.
				1053	if isinstance(content, basestring):
				1054	assert not isinstance(content, unicode), 'Unicode string is not allowed'
				1055	content = [content]
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1056
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1057	# TODO(vadimsh): Do not read from \|content\| generator when retrying push.
				1058	# If \|content\| is indeed a generator, it can not be re-winded back
				1059	# to the beginning of the stream. A retry will find it exhausted. A possible
				1060	# solution is to wrap \|content\| generator with some sort of caching
				1061	# restartable generator. It should be done alongside streaming support
				1062	# implementation.
				1063
				1064	# This push operation may be a retry after failed finalization call below,
				1065	# no need to reupload contents in that case.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1066	if not push_state.uploaded:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1067	# A cheezy way to avoid memcpy of (possibly huge) file, until streaming
				1068	# upload support is implemented.
				1069	if isinstance(content, list) and len(content) == 1:
				1070	content = content[0]
				1071	else:
				1072	content = ''.join(content)
				1073	# PUT file to \|upload_url\|.
				1074	response = net.url_read(
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1075	url=push_state.upload_url,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1076	data=content,
				1077	content_type='application/octet-stream',
				1078	method='PUT')
				1079	if response is None:
				1080	raise IOError('Failed to upload a file %s to %s' % (
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1081	item.digest, push_state.upload_url))
				1082	push_state.uploaded = True
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1083	else:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1084	logging.info(
				1085	'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1086
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1087	# Optionally notify the server that it's done.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1088	if push_state.finalize_url:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1089	# TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
				1090	# send it to isolated server. That way isolate server can verify that
				1091	# the data safely reached Google Storage (GS provides MD5 and CRC32C of
				1092	# stored files).
Marc-Antoine Ruel	c1c2ccc	2014-08-13 19:18:49 -0400	[diff] [blame]	1093	# TODO(maruel): Fix the server to accept propery data={} so
				1094	# url_read_json() can be used.
				1095	response = net.url_read(
				1096	url=push_state.finalize_url,
				1097	data='',
				1098	content_type='application/json',
				1099	method='POST')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1100	if response is None:
				1101	raise IOError('Failed to finalize an upload of %s' % item.digest)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1102	push_state.finalized = True
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1103
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1104	def contains(self, items):
				1105	logging.info('Checking existence of %d files...', len(items))
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1106
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1107	# Ensure all items were initialized with 'prepare' call. Storage does that.
				1108	assert all(i.digest is not None and i.size is not None for i in items)
				1109
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1110	# Request body is a json encoded list of dicts.
				1111	body = [
				1112	{
				1113	'h': item.digest,
				1114	's': item.size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1115	'i': int(item.high_priority),
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1116	} for item in items
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1117	]
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1118
				1119	query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1120	self._base_url,
				1121	self._namespace,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1122	urllib.quote(self._server_capabilities['access_token']))
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1123
				1124	# Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1125	response = None
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1126	try:
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1127	response = net.url_read_json(url=query_url, data=body)
				1128	if response is None:
				1129	raise MappingError('Failed to execute /pre-upload query')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1130	if not isinstance(response, list):
				1131	raise ValueError('Expecting response with json-encoded list')
				1132	if len(response) != len(items):
				1133	raise ValueError(
				1134	'Incorrect number of items in the list, expected %d, '
				1135	'but got %d' % (len(items), len(response)))
				1136	except ValueError as err:
				1137	raise MappingError(
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1138	'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1139
				1140	# Pick Items that are missing, attach _PushState to them.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1141	missing_items = {}
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1142	for i, push_urls in enumerate(response):
				1143	if push_urls:
				1144	assert len(push_urls) == 2, str(push_urls)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1145	missing_items[items[i]] = _IsolateServerPushState(
				1146	push_urls[0], push_urls[1])
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1147	logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1148	len(items), len(items) - len(missing_items))
				1149	return missing_items
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1150
				1151
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1152	class FileSystem(StorageApi):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1153	"""StorageApi implementation that fetches data from the file system.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1154
				1155	The common use case is a NFS/CIFS file server that is mounted locally that is
				1156	used to fetch the file on a local partition.
				1157	"""
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1158
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1159	# Used for push_state instead of None. That way caller is forced to
				1160	# call 'contains' before 'push'. Naively passing None in 'push' will not work.
				1161	_DUMMY_PUSH_STATE = object()
				1162
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1163	def __init__(self, base_path, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1164	super(FileSystem, self).__init__()
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1165	self._base_path = base_path
				1166	self._namespace = namespace
				1167
				1168	@property
				1169	def location(self):
				1170	return self._base_path
				1171
				1172	@property
				1173	def namespace(self):
				1174	return self._namespace
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1175
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	1176	def get_fetch_url(self, digest):
				1177	return None
				1178
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1179	def fetch(self, digest, offset=0):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1180	assert isinstance(digest, basestring)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1181	return file_read(os.path.join(self._base_path, digest), offset=offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1182
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1183	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1184	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1185	assert item.digest is not None
				1186	assert item.size is not None
				1187	assert push_state is self._DUMMY_PUSH_STATE
				1188	content = item.content() if content is None else content
				1189	if isinstance(content, basestring):
				1190	assert not isinstance(content, unicode), 'Unicode string is not allowed'
				1191	content = [content]
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1192	file_write(os.path.join(self._base_path, item.digest), content)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1193
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1194	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1195	assert all(i.digest is not None and i.size is not None for i in items)
				1196	return dict(
				1197	(item, self._DUMMY_PUSH_STATE) for item in items
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1198	if not os.path.exists(os.path.join(self._base_path, item.digest))
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1199	)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1200
				1201
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1202	class LocalCache(object):
				1203	"""Local cache that stores objects fetched via Storage.
				1204
				1205	It can be accessed concurrently from multiple threads, so it should protect
				1206	its internal state with some lock.
				1207	"""
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1208	cache_dir = None
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1209
				1210	def __enter__(self):
				1211	"""Context manager interface."""
				1212	return self
				1213
				1214	def __exit__(self, _exc_type, _exec_value, _traceback):
				1215	"""Context manager interface."""
				1216	return False
				1217
				1218	def cached_set(self):
				1219	"""Returns a set of all cached digests (always a new object)."""
				1220	raise NotImplementedError()
				1221
				1222	def touch(self, digest, size):
				1223	"""Ensures item is not corrupted and updates its LRU position.
				1224
				1225	Arguments:
				1226	digest: hash digest of item to check.
				1227	size: expected size of this item.
				1228
				1229	Returns:
				1230	True if item is in cache and not corrupted.
				1231	"""
				1232	raise NotImplementedError()
				1233
				1234	def evict(self, digest):
				1235	"""Removes item from cache if it's there."""
				1236	raise NotImplementedError()
				1237
				1238	def read(self, digest):
				1239	"""Returns contents of the cached item as a single str."""
				1240	raise NotImplementedError()
				1241
				1242	def write(self, digest, content):
				1243	"""Reads data from \|content\| generator and stores it in cache."""
				1244	raise NotImplementedError()
				1245
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1246	def hardlink(self, digest, dest, file_mode):
				1247	"""Ensures file at \|dest\| has same content as cached \|digest\|.
				1248
				1249	If file_mode is provided, it is used to set the executable bit if
				1250	applicable.
				1251	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1252	raise NotImplementedError()
				1253
				1254
				1255	class MemoryCache(LocalCache):
				1256	"""LocalCache implementation that stores everything in memory."""
				1257
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1258	def __init__(self, file_mode_mask=0500):
				1259	"""Args:
				1260	file_mode_mask: bit mask to AND file mode with. Default value will make
				1261	all mapped files to be read only.
				1262	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1263	super(MemoryCache, self).__init__()
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1264	self._file_mode_mask = file_mode_mask
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1265	# Let's not assume dict is thread safe.
				1266	self._lock = threading.Lock()
				1267	self._contents = {}
				1268
				1269	def cached_set(self):
				1270	with self._lock:
				1271	return set(self._contents)
				1272
				1273	def touch(self, digest, size):
				1274	with self._lock:
				1275	return digest in self._contents
				1276
				1277	def evict(self, digest):
				1278	with self._lock:
				1279	self._contents.pop(digest, None)
				1280
				1281	def read(self, digest):
				1282	with self._lock:
				1283	return self._contents[digest]
				1284
				1285	def write(self, digest, content):
				1286	# Assemble whole stream before taking the lock.
				1287	data = ''.join(content)
				1288	with self._lock:
				1289	self._contents[digest] = data
				1290
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1291	def hardlink(self, digest, dest, file_mode):
				1292	"""Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1293	file_write(dest, [self.read(digest)])
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	1294	if file_mode is not None:
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1295	os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1296
				1297
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1298	def is_namespace_with_compression(namespace):
				1299	"""Returns True if given \|namespace\| stores compressed objects."""
				1300	return namespace.endswith(('-gzip', '-deflate'))
				1301
				1302
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1303	def get_storage_api(file_or_url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1304	"""Returns an object that implements low-level StorageApi interface.
				1305
				1306	It is used by Storage to work with single isolate \|namespace\|. It should
				1307	rarely be used directly by clients, see 'get_storage' for
				1308	a better alternative.
				1309
				1310	Arguments:
				1311	file_or_url: a file path to use file system based storage, or URL of isolate
				1312	service to use shared cloud based storage.
				1313	namespace: isolate namespace to operate in, also defines hashing and
				1314	compression scheme used, i.e. namespace names that end with '-gzip'
				1315	store compressed data.
				1316
				1317	Returns:
				1318	Instance of StorageApi subclass.
				1319	"""
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	1320	if file_path.is_url(file_or_url):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1321	return IsolateServer(file_or_url, namespace)
				1322	else:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1323	return FileSystem(file_or_url, namespace)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1324
				1325
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1326	def get_storage(file_or_url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1327	"""Returns Storage class that can upload and download from \|namespace\|.
				1328
				1329	Arguments:
				1330	file_or_url: a file path to use file system based storage, or URL of isolate
				1331	service to use shared cloud based storage.
				1332	namespace: isolate namespace to operate in, also defines hashing and
				1333	compression scheme used, i.e. namespace names that end with '-gzip'
				1334	store compressed data.
				1335
				1336	Returns:
				1337	Instance of Storage.
				1338	"""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1339	return Storage(get_storage_api(file_or_url, namespace))
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1340
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	1341
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1342	def expand_symlinks(indir, relfile):
				1343	"""Follows symlinks in \|relfile\|, but treating symlinks that point outside the
				1344	build tree as if they were ordinary directories/files. Returns the final
				1345	symlink-free target and a list of paths to symlinks encountered in the
				1346	process.
				1347
				1348	The rule about symlinks outside the build tree is for the benefit of the
				1349	Chromium OS ebuild, which symlinks the output directory to an unrelated path
				1350	in the chroot.
				1351
				1352	Fails when a directory loop is detected, although in theory we could support
				1353	that case.
				1354	"""
				1355	is_directory = relfile.endswith(os.path.sep)
				1356	done = indir
				1357	todo = relfile.strip(os.path.sep)
				1358	symlinks = []
				1359
				1360	while todo:
				1361	pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
				1362	done, todo)
				1363	if not symlink:
				1364	todo = file_path.fix_native_path_case(done, todo)
				1365	done = os.path.join(done, todo)
				1366	break
				1367	symlink_path = os.path.join(done, pre_symlink, symlink)
				1368	post_symlink = post_symlink.lstrip(os.path.sep)
				1369	# readlink doesn't exist on Windows.
				1370	# pylint: disable=E1101
				1371	target = os.path.normpath(os.path.join(done, pre_symlink))
				1372	symlink_target = os.readlink(symlink_path)
				1373	if os.path.isabs(symlink_target):
				1374	# Absolute path are considered a normal directories. The use case is
				1375	# generally someone who puts the output directory on a separate drive.
				1376	target = symlink_target
				1377	else:
				1378	# The symlink itself could be using the wrong path case.
				1379	target = file_path.fix_native_path_case(target, symlink_target)
				1380
				1381	if not os.path.exists(target):
				1382	raise MappingError(
				1383	'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
				1384	target = file_path.get_native_path_case(target)
				1385	if not file_path.path_starts_with(indir, target):
				1386	done = symlink_path
				1387	todo = post_symlink
				1388	continue
				1389	if file_path.path_starts_with(target, symlink_path):
				1390	raise MappingError(
				1391	'Can\'t map recursive symlink reference %s -> %s' %
				1392	(symlink_path, target))
				1393	logging.info('Found symlink: %s -> %s', symlink_path, target)
				1394	symlinks.append(os.path.relpath(symlink_path, indir))
				1395	# Treat the common prefix of the old and new paths as done, and start
				1396	# scanning again.
				1397	target = target.split(os.path.sep)
				1398	symlink_path = symlink_path.split(os.path.sep)
				1399	prefix_length = 0
				1400	for target_piece, symlink_path_piece in zip(target, symlink_path):
				1401	if target_piece == symlink_path_piece:
				1402	prefix_length += 1
				1403	else:
				1404	break
				1405	done = os.path.sep.join(target[:prefix_length])
				1406	todo = os.path.join(
				1407	os.path.sep.join(target[prefix_length:]), post_symlink)
				1408
				1409	relfile = os.path.relpath(done, indir)
				1410	relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
				1411	return relfile, symlinks
				1412
				1413
				1414	def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
				1415	"""Expands a single input. It can result in multiple outputs.
				1416
				1417	This function is recursive when relfile is a directory.
				1418
				1419	Note: this code doesn't properly handle recursive symlink like one created
				1420	with:
				1421	ln -s .. foo
				1422	"""
				1423	if os.path.isabs(relfile):
				1424	raise MappingError('Can\'t map absolute path %s' % relfile)
				1425
				1426	infile = file_path.normpath(os.path.join(indir, relfile))
				1427	if not infile.startswith(indir):
				1428	raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
				1429
				1430	filepath = os.path.join(indir, relfile)
				1431	native_filepath = file_path.get_native_path_case(filepath)
				1432	if filepath != native_filepath:
				1433	# Special case './'.
				1434	if filepath != native_filepath + '.' + os.path.sep:
Marc-Antoine Ruel	582e224	2014-06-26 15:22:06 -0400	[diff] [blame]	1435	# While it'd be nice to enforce path casing on Windows, it's impractical.
				1436	# Also give up enforcing strict path case on OSX. Really, it's that sad.
				1437	# The case where it happens is very specific and hard to reproduce:
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1438	# get_native_path_case(
				1439	# u'Foo.framework/Versions/A/Resources/Something.nib') will return
				1440	# u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
				1441	#
				1442	# Note that this is really something deep in OSX because running
				1443	# ls Foo.framework/Versions/A
				1444	# will print out 'Resources', while file_path.get_native_path_case()
				1445	# returns a lower case 'r'.
				1446	#
				1447	# So something is happening under the hood resulting in the command 'ls'
				1448	# and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
				1449	# have no idea why.
Marc-Antoine Ruel	582e224	2014-06-26 15:22:06 -0400	[diff] [blame]	1450	if sys.platform not in ('darwin', 'win32'):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1451	raise MappingError(
				1452	'File path doesn\'t equal native file path\n%s != %s' %
				1453	(filepath, native_filepath))
				1454
				1455	symlinks = []
				1456	if follow_symlinks:
				1457	relfile, symlinks = expand_symlinks(indir, relfile)
				1458
				1459	if relfile.endswith(os.path.sep):
				1460	if not os.path.isdir(infile):
				1461	raise MappingError(
				1462	'%s is not a directory but ends with "%s"' % (infile, os.path.sep))
				1463
				1464	# Special case './'.
				1465	if relfile.startswith('.' + os.path.sep):
				1466	relfile = relfile[2:]
				1467	outfiles = symlinks
				1468	try:
				1469	for filename in os.listdir(infile):
				1470	inner_relfile = os.path.join(relfile, filename)
				1471	if blacklist and blacklist(inner_relfile):
				1472	continue
				1473	if os.path.isdir(os.path.join(indir, inner_relfile)):
				1474	inner_relfile += os.path.sep
				1475	outfiles.extend(
				1476	expand_directory_and_symlink(indir, inner_relfile, blacklist,
				1477	follow_symlinks))
				1478	return outfiles
				1479	except OSError as e:
				1480	raise MappingError(
				1481	'Unable to iterate over directory %s.\n%s' % (infile, e))
				1482	else:
				1483	# Always add individual files even if they were blacklisted.
				1484	if os.path.isdir(infile):
				1485	raise MappingError(
				1486	'Input directory %s must have a trailing slash' % infile)
				1487
				1488	if not os.path.isfile(infile):
				1489	raise MappingError('Input file %s doesn\'t exist' % infile)
				1490
				1491	return symlinks + [relfile]
				1492
				1493
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1494	def process_input(filepath, prevdict, read_only, algo):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1495	"""Processes an input file, a dependency, and return meta data about it.
				1496
				1497	Behaviors:
				1498	- Retrieves the file mode, file size, file timestamp, file link
				1499	destination if it is a file link and calcultate the SHA-1 of the file's
				1500	content if the path points to a file and not a symlink.
				1501
				1502	Arguments:
				1503	filepath: File to act on.
				1504	prevdict: the previous dictionary. It is used to retrieve the cached sha-1
				1505	to skip recalculating the hash. Optional.
Marc-Antoine Ruel	7124e39	2014-01-09 11:49:21 -0500	[diff] [blame]	1506	read_only: If 1 or 2, the file mode is manipulated. In practice, only save
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1507	one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
				1508	windows, mode is not set since all files are 'executable' by
				1509	default.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1510	algo: Hashing algorithm used.
				1511
				1512	Returns:
				1513	The necessary data to create a entry in the 'files' section of an .isolated
				1514	file.
				1515	"""
				1516	out = {}
				1517	# TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
				1518	# if prevdict.get('T') == True:
				1519	# # The file's content is ignored. Skip the time and hard code mode.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1520	# out['s'] = 0
				1521	# out['h'] = algo().hexdigest()
				1522	# out['T'] = True
				1523	# return out
				1524
				1525	# Always check the file stat and check if it is a link. The timestamp is used
				1526	# to know if the file's content/symlink destination should be looked into.
				1527	# E.g. only reuse from prevdict if the timestamp hasn't changed.
				1528	# There is the risk of the file's timestamp being reset to its last value
				1529	# manually while its content changed. We don't protect against that use case.
				1530	try:
				1531	filestats = os.lstat(filepath)
				1532	except OSError:
				1533	# The file is not present.
				1534	raise MappingError('%s is missing' % filepath)
				1535	is_link = stat.S_ISLNK(filestats.st_mode)
				1536
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1537	if sys.platform != 'win32':
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1538	# Ignore file mode on Windows since it's not really useful there.
				1539	filemode = stat.S_IMODE(filestats.st_mode)
				1540	# Remove write access for group and all access to 'others'.
				1541	filemode &= ~(stat.S_IWGRP \| stat.S_IRWXO)
				1542	if read_only:
				1543	filemode &= ~stat.S_IWUSR
				1544	if filemode & stat.S_IXUSR:
				1545	filemode \|= stat.S_IXGRP
				1546	else:
				1547	filemode &= ~stat.S_IXGRP
				1548	if not is_link:
				1549	out['m'] = filemode
				1550
				1551	# Used to skip recalculating the hash or link destination. Use the most recent
				1552	# update time.
				1553	# TODO(maruel): Save it in the .state file instead of .isolated so the
				1554	# .isolated file is deterministic.
				1555	out['t'] = int(round(filestats.st_mtime))
				1556
				1557	if not is_link:
				1558	out['s'] = filestats.st_size
				1559	# If the timestamp wasn't updated and the file size is still the same, carry
				1560	# on the sha-1.
				1561	if (prevdict.get('t') == out['t'] and
				1562	prevdict.get('s') == out['s']):
				1563	# Reuse the previous hash if available.
				1564	out['h'] = prevdict.get('h')
				1565	if not out.get('h'):
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1566	out['h'] = isolated_format.hash_file(filepath, algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1567	else:
				1568	# If the timestamp wasn't updated, carry on the link destination.
				1569	if prevdict.get('t') == out['t']:
				1570	# Reuse the previous link destination if available.
				1571	out['l'] = prevdict.get('l')
				1572	if out.get('l') is None:
				1573	# The link could be in an incorrect path case. In practice, this only
				1574	# happen on OSX on case insensitive HFS.
				1575	# TODO(maruel): It'd be better if it was only done once, in
				1576	# expand_directory_and_symlink(), so it would not be necessary to do again
				1577	# here.
				1578	symlink_value = os.readlink(filepath) # pylint: disable=E1101
				1579	filedir = file_path.get_native_path_case(os.path.dirname(filepath))
				1580	native_dest = file_path.fix_native_path_case(filedir, symlink_value)
				1581	out['l'] = os.path.relpath(native_dest, filedir)
				1582	return out
				1583
				1584
				1585	def save_isolated(isolated, data):
				1586	"""Writes one or multiple .isolated files.
				1587
				1588	Note: this reference implementation does not create child .isolated file so it
				1589	always returns an empty list.
				1590
				1591	Returns the list of child isolated files that are included by \|isolated\|.
				1592	"""
				1593	# Make sure the data is valid .isolated data by 'reloading' it.
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1594	algo = isolated_format.SUPPORTED_ALGOS[data['algo']]
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1595	load_isolated(json.dumps(data), algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	1596	tools.write_json(isolated, data, True)
				1597	return []
				1598
				1599
maruel@chromium.org	7b844a6	2013-09-17 13:04:59 +0000	[diff] [blame]	1600	def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1601	"""Uploads the given tree to the given url.
				1602
				1603	Arguments:
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	1604	base_url: The base url, it is assume that \|base_url\|/has/ can be used to
				1605	query if an element was already uploaded, and \|base_url\|/store/
				1606	can be used to upload a new element.
				1607	indir: Root directory the infiles are based in.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1608	infiles: dict of files to upload from \|indir\| to \|base_url\|.
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	1609	namespace: The namespace to use on the server.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1610	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1611	logging.info('upload_tree(indir=%s, files=%d)', indir, len(infiles))
				1612
				1613	# Convert \|indir\| + \|infiles\| into a list of FileItem objects.
				1614	# Filter out symlinks, since they are not represented by items on isolate
				1615	# server side.
				1616	items = [
				1617	FileItem(
				1618	path=os.path.join(indir, filepath),
				1619	digest=metadata['h'],
				1620	size=metadata['s'],
				1621	high_priority=metadata.get('priority') == '0')
				1622	for filepath, metadata in infiles.iteritems()
				1623	if 'l' not in metadata
				1624	]
				1625
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1626	with get_storage(base_url, namespace) as storage:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1627	storage.upload_items(items)
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	1628	return 0
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1629
				1630
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1631	def load_isolated(content, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1632	"""Verifies the .isolated file is valid and loads this object with the json
				1633	data.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1634
				1635	Arguments:
				1636	- content: raw serialized content to load.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1637	- algo: hashlib algorithm class. Used to confirm the algorithm matches the
				1638	algorithm used on the Isolate Server.
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1639	"""
				1640	try:
				1641	data = json.loads(content)
				1642	except ValueError:
				1643	raise ConfigError('Failed to parse: %s...' % content[:100])
				1644
				1645	if not isinstance(data, dict):
				1646	raise ConfigError('Expected dict, got %r' % data)
				1647
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1648	# Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1649	value = data.get('version', '1.0')
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1650	if not isinstance(value, basestring):
				1651	raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1652	try:
				1653	version = tuple(map(int, value.split('.')))
				1654	except ValueError:
				1655	raise ConfigError('Expected valid version, got %r' % value)
				1656
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1657	expected_version = tuple(
				1658	map(int, isolated_format.ISOLATED_FILE_VERSION.split('.')))
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1659	# Major version must match.
				1660	if version[0] != expected_version[0]:
Marc-Antoine Ruel	1c1edd6	2013-12-06 09:13:13 -0500	[diff] [blame]	1661	raise ConfigError(
				1662	'Expected compatible \'%s\' version, got %r' %
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1663	(isolated_format.ISOLATED_FILE_VERSION, value))
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1664
				1665	if algo is None:
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	1666	# TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1667	# Default the algorithm used in the .isolated file itself, falls back to
				1668	# 'sha-1' if unspecified.
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1669	algo = isolated_format.SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1670
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1671	for key, value in data.iteritems():
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1672	if key == 'algo':
				1673	if not isinstance(value, basestring):
				1674	raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1675	if value not in isolated_format.SUPPORTED_ALGOS:
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1676	raise ConfigError(
				1677	'Expected one of \'%s\', got %r' %
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1678	(', '.join(sorted(isolated_format.SUPPORTED_ALGOS)), value))
				1679	if value != isolated_format.SUPPORTED_ALGOS_REVERSE[algo]:
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1680	raise ConfigError(
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1681	'Expected \'%s\', got %r' %
				1682	(isolated_format.SUPPORTED_ALGOS_REVERSE[algo], value))
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1683
				1684	elif key == 'command':
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1685	if not isinstance(value, list):
				1686	raise ConfigError('Expected list, got %r' % value)
				1687	if not value:
				1688	raise ConfigError('Expected non-empty command')
				1689	for subvalue in value:
				1690	if not isinstance(subvalue, basestring):
				1691	raise ConfigError('Expected string, got %r' % subvalue)
				1692
				1693	elif key == 'files':
				1694	if not isinstance(value, dict):
				1695	raise ConfigError('Expected dict, got %r' % value)
				1696	for subkey, subvalue in value.iteritems():
				1697	if not isinstance(subkey, basestring):
				1698	raise ConfigError('Expected string, got %r' % subkey)
				1699	if not isinstance(subvalue, dict):
				1700	raise ConfigError('Expected dict, got %r' % subvalue)
				1701	for subsubkey, subsubvalue in subvalue.iteritems():
				1702	if subsubkey == 'l':
				1703	if not isinstance(subsubvalue, basestring):
				1704	raise ConfigError('Expected string, got %r' % subsubvalue)
				1705	elif subsubkey == 'm':
				1706	if not isinstance(subsubvalue, int):
				1707	raise ConfigError('Expected int, got %r' % subsubvalue)
				1708	elif subsubkey == 'h':
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1709	if not isolated_format.is_valid_hash(subsubvalue, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1710	raise ConfigError('Expected sha-1, got %r' % subsubvalue)
				1711	elif subsubkey == 's':
Marc-Antoine Ruel	aab3a62	2013-11-28 09:47:05 -0500	[diff] [blame]	1712	if not isinstance(subsubvalue, (int, long)):
				1713	raise ConfigError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1714	else:
				1715	raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1716	if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1717	raise ConfigError(
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1718	'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
				1719	subvalue)
				1720	if bool('h' in subvalue) != bool('s' in subvalue):
				1721	raise ConfigError(
				1722	'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
				1723	subvalue)
				1724	if bool('s' in subvalue) == bool('l' in subvalue):
				1725	raise ConfigError(
				1726	'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
				1727	subvalue)
				1728	if bool('l' in subvalue) and bool('m' in subvalue):
				1729	raise ConfigError(
				1730	'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1731	subvalue)
				1732
				1733	elif key == 'includes':
				1734	if not isinstance(value, list):
				1735	raise ConfigError('Expected list, got %r' % value)
				1736	if not value:
				1737	raise ConfigError('Expected non-empty includes list')
				1738	for subvalue in value:
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1739	if not isolated_format.is_valid_hash(subvalue, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1740	raise ConfigError('Expected sha-1, got %r' % subvalue)
				1741
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1742	elif key == 'os':
				1743	if version >= (1, 4):
				1744	raise ConfigError('Key \'os\' is not allowed starting version 1.4')
				1745
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1746	elif key == 'read_only':
Marc-Antoine Ruel	7124e39	2014-01-09 11:49:21 -0500	[diff] [blame]	1747	if not value in (0, 1, 2):
				1748	raise ConfigError('Expected 0, 1 or 2, got %r' % value)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1749
				1750	elif key == 'relative_cwd':
				1751	if not isinstance(value, basestring):
				1752	raise ConfigError('Expected string, got %r' % value)
				1753
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1754	elif key == 'version':
				1755	# Already checked above.
				1756	pass
				1757
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1758	else:
maruel@chromium.org	385d73d	2013-09-19 18:33:21 +0000	[diff] [blame]	1759	raise ConfigError('Unknown key %r' % key)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1760
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1761	# Automatically fix os.path.sep if necessary. While .isolated files are always
				1762	# in the the native path format, someone could want to download an .isolated
				1763	# tree from another OS.
				1764	wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
				1765	if 'files' in data:
				1766	data['files'] = dict(
				1767	(k.replace(wrong_path_sep, os.path.sep), v)
				1768	for k, v in data['files'].iteritems())
				1769	for v in data['files'].itervalues():
				1770	if 'l' in v:
				1771	v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
				1772	if 'relative_cwd' in data:
				1773	data['relative_cwd'] = data['relative_cwd'].replace(
				1774	wrong_path_sep, os.path.sep)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1775	return data
				1776
				1777
				1778	class IsolatedFile(object):
				1779	"""Represents a single parsed .isolated file."""
				1780	def __init__(self, obj_hash, algo):
				1781	"""\|obj_hash\| is really the sha-1 of the file."""
				1782	logging.debug('IsolatedFile(%s)' % obj_hash)
				1783	self.obj_hash = obj_hash
				1784	self.algo = algo
				1785	# Set once all the left-side of the tree is parsed. 'Tree' here means the
				1786	# .isolate and all the .isolated files recursively included by it with
				1787	# 'includes' key. The order of each sha-1 in 'includes', each representing a
				1788	# .isolated file in the hash table, is important, as the later ones are not
				1789	# processed until the firsts are retrieved and read.
				1790	self.can_fetch = False
				1791
				1792	# Raw data.
				1793	self.data = {}
				1794	# A IsolatedFile instance, one per object in self.includes.
				1795	self.children = []
				1796
				1797	# Set once the .isolated file is loaded.
				1798	self._is_parsed = False
				1799	# Set once the files are fetched.
				1800	self.files_fetched = False
				1801
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1802	def load(self, content):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1803	"""Verifies the .isolated file is valid and loads this object with the json
				1804	data.
				1805	"""
				1806	logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
				1807	assert not self._is_parsed
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1808	self.data = load_isolated(content, self.algo)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1809	self.children = [
				1810	IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
				1811	]
				1812	self._is_parsed = True
				1813
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1814	def fetch_files(self, fetch_queue, files):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1815	"""Adds files in this .isolated file not present in \|files\| dictionary.
				1816
				1817	Preemptively request files.
				1818
				1819	Note that \|files\| is modified by this function.
				1820	"""
				1821	assert self.can_fetch
				1822	if not self._is_parsed or self.files_fetched:
				1823	return
				1824	logging.debug('fetch_files(%s)' % self.obj_hash)
				1825	for filepath, properties in self.data.get('files', {}).iteritems():
				1826	# Root isolated has priority on the files being mapped. In particular,
				1827	# overriden files must not be fetched.
				1828	if filepath not in files:
				1829	files[filepath] = properties
				1830	if 'h' in properties:
				1831	# Preemptively request files.
				1832	logging.debug('fetching %s' % filepath)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1833	fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1834	self.files_fetched = True
				1835
				1836
				1837	class Settings(object):
				1838	"""Results of a completely parsed .isolated file."""
				1839	def __init__(self):
				1840	self.command = []
				1841	self.files = {}
				1842	self.read_only = None
				1843	self.relative_cwd = None
				1844	# The main .isolated file, a IsolatedFile instance.
				1845	self.root = None
				1846
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1847	def load(self, fetch_queue, root_isolated_hash, algo):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1848	"""Loads the .isolated and all the included .isolated asynchronously.
				1849
				1850	It enables support for "included" .isolated files. They are processed in
				1851	strict order but fetched asynchronously from the cache. This is important so
				1852	that a file in an included .isolated file that is overridden by an embedding
				1853	.isolated file is not fetched needlessly. The includes are fetched in one
				1854	pass and the files are fetched as soon as all the ones on the left-side
				1855	of the tree were fetched.
				1856
				1857	The prioritization is very important here for nested .isolated files.
				1858	'includes' have the highest priority and the algorithm is optimized for both
				1859	deep and wide trees. A deep one is a long link of .isolated files referenced
				1860	one at a time by one item in 'includes'. A wide one has a large number of
				1861	'includes' in a single .isolated file. 'left' is defined as an included
				1862	.isolated file earlier in the 'includes' list. So the order of the elements
				1863	in 'includes' is important.
				1864	"""
				1865	self.root = IsolatedFile(root_isolated_hash, algo)
				1866
				1867	# Isolated files being retrieved now: hash -> IsolatedFile instance.
				1868	pending = {}
				1869	# Set of hashes of already retrieved items to refuse recursive includes.
				1870	seen = set()
				1871
				1872	def retrieve(isolated_file):
				1873	h = isolated_file.obj_hash
				1874	if h in seen:
				1875	raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
				1876	assert h not in pending
				1877	seen.add(h)
				1878	pending[h] = isolated_file
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1879	fetch_queue.add(h, priority=WorkerPool.HIGH)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1880
				1881	retrieve(self.root)
				1882
				1883	while pending:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1884	item_hash = fetch_queue.wait(pending)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1885	item = pending.pop(item_hash)
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1886	item.load(fetch_queue.cache.read(item_hash))
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1887	if item_hash == root_isolated_hash:
				1888	# It's the root item.
				1889	item.can_fetch = True
				1890
				1891	for new_child in item.children:
				1892	retrieve(new_child)
				1893
				1894	# Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1895	self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1896
				1897	def check(n):
				1898	return all(check(x) for x in n.children) and n.files_fetched
				1899	assert check(self.root)
				1900
				1901	self.relative_cwd = self.relative_cwd or ''
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1902
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1903	def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1904	if node.can_fetch:
				1905	if not node.files_fetched:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1906	self._update_self(fetch_queue, node)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1907	will_break = False
				1908	for i in node.children:
				1909	if not i.can_fetch:
				1910	if will_break:
				1911	break
				1912	# Automatically mark the first one as fetcheable.
				1913	i.can_fetch = True
				1914	will_break = True
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1915	self._traverse_tree(fetch_queue, i)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1916
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1917	def _update_self(self, fetch_queue, node):
				1918	node.fetch_files(fetch_queue, self.files)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1919	# Grabs properties.
				1920	if not self.command and node.data.get('command'):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1921	# Ensure paths are correctly separated on windows.
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1922	self.command = node.data['command']
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1923	if self.command:
				1924	self.command[0] = self.command[0].replace('/', os.path.sep)
				1925	self.command = tools.fix_python_path(self.command)
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	1926	if self.read_only is None and node.data.get('read_only') is not None:
				1927	self.read_only = node.data['read_only']
				1928	if (self.relative_cwd is None and
				1929	node.data.get('relative_cwd') is not None):
				1930	self.relative_cwd = node.data['relative_cwd']
				1931
				1932
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1933	def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1934	"""Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1935
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1936	Arguments:
				1937	isolated_hash: hash of the root *.isolated file.
				1938	storage: Storage class that communicates with isolate storage.
				1939	cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1940	outdir: Output directory to map file tree to.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1941	require_command: Ensure *.isolated specifies a command to run.
				1942
				1943	Returns:
				1944	Settings object that holds details about loaded *.isolated file.
				1945	"""
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	1946	logging.debug(
				1947	'fetch_isolated(%s, %s, %s, %s, %s)',
				1948	isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1949	# Hash algorithm to use, defined by namespace \|storage\| is using.
				1950	algo = storage.hash_algo
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1951	with cache:
				1952	fetch_queue = FetchQueue(storage, cache)
				1953	settings = Settings()
				1954
				1955	with tools.Profiler('GetIsolateds'):
				1956	# Optionally support local files by manually adding them to cache.
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	1957	if not isolated_format.is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	1958	logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
				1959	try:
				1960	isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
				1961	except IOError:
				1962	raise MappingError(
				1963	'%s doesn\'t seem to be a valid file. Did you intent to pass a '
				1964	'valid hash?' % isolated_hash)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1965
				1966	# Load all *.isolated and start loading rest of the files.
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	1967	settings.load(fetch_queue, isolated_hash, algo)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1968	if require_command and not settings.command:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1969	# TODO(vadimsh): All fetch operations are already enqueue and there's no
				1970	# easy way to cancel them.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1971	raise ConfigError('No command to run')
				1972
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1973	with tools.Profiler('GetRest'):
				1974	# Create file system hierarchy.
				1975	if not os.path.isdir(outdir):
				1976	os.makedirs(outdir)
				1977	create_directories(outdir, settings.files)
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	1978	create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1979
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1980	# Ensure working directory exists.
				1981	cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
				1982	if not os.path.isdir(cwd):
				1983	os.makedirs(cwd)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1984
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1985	# Multimap: digest -> list of pairs (path, props).
				1986	remaining = {}
				1987	for filepath, props in settings.files.iteritems():
				1988	if 'h' in props:
				1989	remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	1990
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1991	# Now block on the remaining files to be downloaded and mapped.
				1992	logging.info('Retrieving remaining files (%d of them)...',
				1993	fetch_queue.pending_count)
				1994	last_update = time.time()
				1995	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
				1996	while remaining:
				1997	detector.ping()
				1998
				1999	# Wait for any item to finish fetching to cache.
				2000	digest = fetch_queue.wait(remaining)
				2001
				2002	# Link corresponding files to a fetched item in cache.
				2003	for filepath, props in remaining.pop(digest):
Marc-Antoine Ruel	fb199cf	2013-11-12 15:38:12 -0500	[diff] [blame]	2004	cache.hardlink(
				2005	digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2006
				2007	# Report progress.
				2008	duration = time.time() - last_update
				2009	if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
				2010	msg = '%d files remaining...' % len(remaining)
				2011	print msg
				2012	logging.info(msg)
				2013	last_update = time.time()
				2014
				2015	# Cache could evict some items we just tried to fetch, it's a fatal error.
				2016	if not fetch_queue.verify_all_cached():
				2017	raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2018	return settings
				2019
				2020
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2021	def directory_to_metadata(root, algo, blacklist):
				2022	"""Returns the FileItem list and .isolated metadata for a directory."""
				2023	root = file_path.get_native_path_case(root)
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	2024	paths = expand_directory_and_symlink(
				2025	root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2026	metadata = dict(
Marc-Antoine Ruel	0519946	2014-03-13 15:40:48 -0400	[diff] [blame]	2027	(relpath, process_input(os.path.join(root, relpath), {}, False, algo))
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	2028	for relpath in paths
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2029	)
				2030	for v in metadata.itervalues():
				2031	v.pop('t')
				2032	items = [
				2033	FileItem(
				2034	path=os.path.join(root, relpath),
				2035	digest=meta['h'],
				2036	size=meta['s'],
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2037	high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2038	for relpath, meta in metadata.iteritems() if 'h' in meta
				2039	]
				2040	return items, metadata
				2041
				2042
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2043	def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2044	"""Stores every entries and returns the relevant data.
				2045
				2046	Arguments:
				2047	storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2048	files: list of file paths to upload. If a directory is specified, a
				2049	.isolated file is created and its hash is returned.
				2050	blacklist: function that returns True if a file should be omitted.
				2051	"""
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2052	assert all(isinstance(i, unicode) for i in files), files
				2053	if len(files) != len(set(map(os.path.abspath, files))):
				2054	raise Error('Duplicate entries found.')
				2055
				2056	results = []
				2057	# The temporary directory is only created as needed.
				2058	tempdir = None
				2059	try:
				2060	# TODO(maruel): Yield the files to a worker thread.
				2061	items_to_upload = []
				2062	for f in files:
				2063	try:
				2064	filepath = os.path.abspath(f)
				2065	if os.path.isdir(filepath):
				2066	# Uploading a whole directory.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2067	items, metadata = directory_to_metadata(
				2068	filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2069
				2070	# Create the .isolated file.
				2071	if not tempdir:
				2072	tempdir = tempfile.mkdtemp(prefix='isolateserver')
				2073	handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
				2074	os.close(handle)
				2075	data = {
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	2076	'algo':
				2077	isolated_format.SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2078	'files': metadata,
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	2079	'version': isolated_format.ISOLATED_FILE_VERSION,
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2080	}
				2081	save_isolated(isolated, data)
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	2082	h = isolated_format.hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2083	items_to_upload.extend(items)
				2084	items_to_upload.append(
				2085	FileItem(
				2086	path=isolated,
				2087	digest=h,
				2088	size=os.stat(isolated).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2089	high_priority=True))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2090	results.append((h, f))
				2091
				2092	elif os.path.isfile(filepath):
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame^]	2093	h = isolated_format.hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2094	items_to_upload.append(
				2095	FileItem(
				2096	path=filepath,
				2097	digest=h,
				2098	size=os.stat(filepath).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2099	high_priority=f.endswith('.isolated')))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2100	results.append((h, f))
				2101	else:
				2102	raise Error('%s is neither a file or directory.' % f)
				2103	except OSError:
				2104	raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2105	# Technically we would care about which files were uploaded but we don't
				2106	# much in practice.
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2107	_uploaded_files = storage.upload_items(items_to_upload)
				2108	return results
				2109	finally:
				2110	if tempdir:
				2111	shutil.rmtree(tempdir)
				2112
				2113
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2114	def archive(out, namespace, files, blacklist):
				2115	if files == ['-']:
				2116	files = sys.stdin.readlines()
				2117
				2118	if not files:
				2119	raise Error('Nothing to upload')
				2120
				2121	files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2122	blacklist = tools.gen_blacklist(blacklist)
				2123	with get_storage(out, namespace) as storage:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2124	results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2125	print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
				2126
				2127
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2128	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2129	def CMDarchive(parser, args):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2130	"""Archives data to the server.
				2131
				2132	If a directory is specified, a .isolated file is created the whole directory
				2133	is uploaded. Then this .isolated file can be included in another one to run
				2134	commands.
				2135
				2136	The commands output each file that was processed with its content hash. For
				2137	directories, the .isolated generated for the directory is listed as the
				2138	directory entry itself.
				2139	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2140	add_isolate_server_options(parser, False)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2141	parser.add_option(
				2142	'--blacklist',
				2143	action='append', default=list(DEFAULT_BLACKLIST),
				2144	help='List of regexp to use as blacklist filter when uploading '
				2145	'directories')
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2146	options, files = parser.parse_args(args)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2147	process_isolate_server_options(parser, options)
Vadim Shtayura	6b555c1	2014-07-23 16:22:18 -0700	[diff] [blame]	2148	if file_path.is_url(options.isolate_server):
				2149	auth.ensure_logged_in(options.isolate_server)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2150	try:
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2151	archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2152	except Error as e:
				2153	parser.error(e.args[0])
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2154	return 0
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2155
				2156
				2157	def CMDdownload(parser, args):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2158	"""Download data from the server.
				2159
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2160	It can either download individual files or a complete tree from a .isolated
				2161	file.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2162	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2163	add_isolate_server_options(parser, True)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2164	parser.add_option(
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2165	'-i', '--isolated', metavar='HASH',
				2166	help='hash of an isolated file, .isolated file content is discarded, use '
				2167	'--file if you need it')
				2168	parser.add_option(
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2169	'-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
				2170	help='hash and destination of a file, can be used multiple times')
				2171	parser.add_option(
				2172	'-t', '--target', metavar='DIR', default=os.getcwd(),
				2173	help='destination directory')
				2174	options, args = parser.parse_args(args)
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2175	process_isolate_server_options(parser, options)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2176	if args:
				2177	parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2178	if bool(options.isolated) == bool(options.file):
				2179	parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2180
				2181	options.target = os.path.abspath(options.target)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2182
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2183	remote = options.isolate_server or options.indir
Vadim Shtayura	6b555c1	2014-07-23 16:22:18 -0700	[diff] [blame]	2184	if file_path.is_url(remote):
				2185	auth.ensure_logged_in(remote)
				2186
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2187	with get_storage(remote, options.namespace) as storage:
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2188	# Fetching individual files.
				2189	if options.file:
				2190	channel = threading_utils.TaskChannel()
				2191	pending = {}
				2192	for digest, dest in options.file:
				2193	pending[digest] = dest
				2194	storage.async_fetch(
				2195	channel,
				2196	WorkerPool.MED,
				2197	digest,
				2198	UNKNOWN_FILE_SIZE,
				2199	functools.partial(file_write, os.path.join(options.target, dest)))
				2200	while pending:
				2201	fetched = channel.pull()
				2202	dest = pending.pop(fetched)
				2203	logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2204
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2205	# Fetching whole isolated tree.
				2206	if options.isolated:
				2207	settings = fetch_isolated(
				2208	isolated_hash=options.isolated,
				2209	storage=storage,
				2210	cache=MemoryCache(),
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2211	outdir=options.target,
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2212	require_command=False)
				2213	rel = os.path.join(options.target, settings.relative_cwd)
				2214	print('To run this test please run from the directory %s:' %
				2215	os.path.join(options.target, rel))
				2216	print(' ' + ' '.join(settings.command))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2217
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2218	return 0
				2219
				2220
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2221	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2222	def CMDhashtable(parser, args):
				2223	"""Archives data to a hashtable on the file system.
				2224
				2225	If a directory is specified, a .isolated file is created the whole directory
				2226	is uploaded. Then this .isolated file can be included in another one to run
				2227	commands.
				2228
				2229	The commands output each file that was processed with its content hash. For
				2230	directories, the .isolated generated for the directory is listed as the
				2231	directory entry itself.
				2232	"""
				2233	add_outdir_options(parser)
				2234	parser.add_option(
				2235	'--blacklist',
				2236	action='append', default=list(DEFAULT_BLACKLIST),
				2237	help='List of regexp to use as blacklist filter when uploading '
				2238	'directories')
				2239	options, files = parser.parse_args(args)
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2240	process_outdir_options(parser, options, os.getcwd())
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2241	try:
				2242	# Do not compress files when archiving to the file system.
				2243	archive(options.outdir, 'default', files, options.blacklist)
				2244	except Error as e:
				2245	parser.error(e.args[0])
				2246	return 0
				2247
				2248
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2249	def add_isolate_server_options(parser, add_indir):
				2250	"""Adds --isolate-server and --namespace options to parser.
				2251
				2252	Includes --indir if desired.
				2253	"""
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2254	parser.add_option(
				2255	'-I', '--isolate-server',
				2256	metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2257	help='URL of the Isolate Server to use. Defaults to the environment '
				2258	'variable ISOLATE_SERVER if set. No need to specify https://, this '
				2259	'is assumed.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2260	parser.add_option(
				2261	'--namespace', default='default-gzip',
				2262	help='The namespace to use on the Isolate Server, default: %default')
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2263	if add_indir:
				2264	parser.add_option(
				2265	'--indir', metavar='DIR',
				2266	help='Directory used to store the hashtable instead of using an '
				2267	'isolate server.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2268
				2269
				2270	def process_isolate_server_options(parser, options):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2271	"""Processes the --isolate-server and --indir options and aborts if neither is
				2272	specified.
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2273	"""
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2274	has_indir = hasattr(options, 'indir')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2275	if not options.isolate_server:
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2276	if not has_indir:
				2277	parser.error('--isolate-server is required.')
				2278	elif not options.indir:
				2279	parser.error('Use one of --indir or --isolate-server.')
				2280	else:
				2281	if has_indir and options.indir:
				2282	parser.error('Use only one of --indir or --isolate-server.')
				2283
				2284	if options.isolate_server:
				2285	parts = urlparse.urlparse(options.isolate_server, 'https')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2286	if parts.query:
				2287	parser.error('--isolate-server doesn\'t support query parameter.')
				2288	if parts.fragment:
				2289	parser.error('--isolate-server doesn\'t support fragment in the url.')
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2290	# urlparse('foo.com') will result in netloc='', path='foo.com', which is not
				2291	# what is desired here.
				2292	new = list(parts)
				2293	if not new[1] and new[2]:
				2294	new[1] = new[2].rstrip('/')
				2295	new[2] = ''
				2296	new[2] = new[2].rstrip('/')
				2297	options.isolate_server = urlparse.urlunparse(new)
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	2298	on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2299	return
				2300
				2301	if file_path.is_url(options.indir):
				2302	parser.error('Can\'t use an URL for --indir.')
				2303	options.indir = unicode(options.indir).replace('/', os.path.sep)
				2304	options.indir = os.path.abspath(
				2305	os.path.normpath(os.path.join(os.getcwd(), options.indir)))
				2306	if not os.path.isdir(options.indir):
				2307	parser.error('Path given to --indir must exist.')
				2308
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2309
				2310
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2311	def add_outdir_options(parser):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2312	"""Adds --outdir, which is orthogonal to --isolate-server.
				2313
				2314	Note: On upload, separate commands are used between 'archive' and 'hashtable'.
				2315	On 'download', the same command can download from either an isolate server or
				2316	a file system.
				2317	"""
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2318	parser.add_option(
				2319	'-o', '--outdir', metavar='DIR',
				2320	help='Directory used to recreate the tree.')
				2321
				2322
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2323	def process_outdir_options(parser, options, cwd):
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2324	if not options.outdir:
				2325	parser.error('--outdir is required.')
				2326	if file_path.is_url(options.outdir):
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2327	parser.error('Can\'t use an URL for --outdir.')
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2328	options.outdir = unicode(options.outdir).replace('/', os.path.sep)
				2329	# outdir doesn't need native path case since tracing is never done from there.
				2330	options.outdir = os.path.abspath(
				2331	os.path.normpath(os.path.join(cwd, options.outdir)))
				2332	# In theory, we'd create the directory outdir right away. Defer doing it in
				2333	# case there's errors in the command line.
				2334
				2335
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2336	class OptionParserIsolateServer(tools.OptionParserWithLogging):
				2337	def __init__(self, **kwargs):
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	2338	tools.OptionParserWithLogging.__init__(
				2339	self,
				2340	version=__version__,
				2341	prog=os.path.basename(sys.modules[__name__].__file__),
				2342	**kwargs)
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	2343	auth.add_auth_options(self)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2344
				2345	def parse_args(self, args, *kwargs):
				2346	options, args = tools.OptionParserWithLogging.parse_args(
				2347	self, args, *kwargs)
Vadim Shtayura	5d1efce	2014-02-04 10:55:43 -0800	[diff] [blame]	2348	auth.process_auth_options(self, options)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2349	return options, args
				2350
				2351
				2352	def main(args):
				2353	dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	2354	return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	2355
				2356
				2357	if __name__ == '__main__':
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2358	fix_encoding.fix_encoding()
				2359	tools.disable_buffering()
				2360	colorama.init()
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2361	sys.exit(main(sys.argv[1:]))