Blame - isolateserver.py - chromium.googlesource.com/infra/luci/client-py

blob: 1c04303a334a75e1cf994bd67823f5a5ec7535bb [file] [log] [blame]

maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1	#!/usr/bin/env python
maruel	ea586f3	2016-04-05 11:11:33 -0700	[diff] [blame]	2	# Copyright 2013 The LUCI Authors. All rights reserved.
maruel	f1f5e2a	2016-05-25 17:10:39 -0700	[diff] [blame]	3	# Use of this source code is governed under the Apache License, Version 2.0
				4	# that can be found in the LICENSE file.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	5
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	6	"""Archives a set of files or directories to an Isolate Server."""
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	7
tansell	26de79e	2016-11-13 18:41:11 -0800	[diff] [blame^]	8	__version__ = '0.7.0'
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	9
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	10	import base64
nodir	90bc8dc	2016-06-15 13:35:21 -0700	[diff] [blame]	11	import errno
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	12	import functools
				13	import io
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	14	import logging
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	15	import optparse
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	16	import os
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	17	import re
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	18	import signal
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	19	import stat
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	20	import sys
tansell	26de79e	2016-11-13 18:41:11 -0800	[diff] [blame^]	21	import tarfile
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	22	import tempfile
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	23	import threading
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	24	import time
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	25	import types
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	26	import zlib
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	27
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	28	from third_party import colorama
				29	from third_party.depot_tools import fix_encoding
				30	from third_party.depot_tools import subcommand
				31
tansell	e4288c3	2016-07-28 09:45:40 -0700	[diff] [blame]	32	from libs import arfile
Marc-Antoine Ruel	3798993	2013-11-19 16:28:08 -0500	[diff] [blame]	33	from utils import file_path
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	34	from utils import fs
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame]	35	from utils import logging_utils
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	36	from utils import lru
vadimsh@chromium.org	6b70621	2013-08-28 15:03:46 +0000	[diff] [blame]	37	from utils import net
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	38	from utils import on_error
maruel	8e4e40c	2016-05-30 06:21:07 -0700	[diff] [blame]	39	from utils import subprocess42
vadimsh@chromium.org	b074b16	2013-08-22 17:55:46 +0000	[diff] [blame]	40	from utils import threading_utils
vadimsh@chromium.org	a432647	2013-08-24 02:05:41 +0000	[diff] [blame]	41	from utils import tools
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	42
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	43	import auth
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	44	import isolated_format
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	45
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	46
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	47	# Version of isolate protocol passed to the server in /handshake request.
				48	ISOLATE_PROTOCOL_VERSION = '1.0'
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	49
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	50
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	51	# The file size to be used when we don't know the correct file size,
				52	# generally used for .isolated files.
				53	UNKNOWN_FILE_SIZE = None
				54
				55
				56	# Maximum expected delay (in seconds) between successive file fetches or uploads
				57	# in Storage. If it takes longer than that, a deadlock might be happening
				58	# and all stack frames for all threads are dumped to log.
				59	DEADLOCK_TIMEOUT = 5 * 60
				60
				61
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	62	# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	63	# All files are sorted by likelihood of a change in the file content
				64	# (currently file size is used to estimate this: larger the file -> larger the
				65	# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	66	# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.org	eea5242	2013-08-21 19:35:54 +0000	[diff] [blame]	67	# and so on. Numbers here is a trade-off; the more per request, the lower the
				68	# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
				69	# larger values cause longer lookups, increasing the initial latency to start
				70	# uploading, which is especially an issue for large files. This value is
				71	# optimized for the "few thousands files to look up with minimal number of large
				72	# files missing" case.
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	73	ITEMS_PER_CONTAINS_QUERIES = (20, 20, 50, 50, 50, 100)
csharp@chromium.org	07fa759	2013-01-11 18:19:30 +0000	[diff] [blame]	74
maruel@chromium.org	9958e4a	2013-09-17 00:01:48 +0000	[diff] [blame]	75
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	76	# A list of already compressed extension types that should not receive any
				77	# compression before being uploaded.
				78	ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel	7f234c8	2014-08-06 21:55:18 -0400	[diff] [blame]	79	'7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
				80	'png', 'wav', 'zip',
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	81	]
				82
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	83
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	84	# Chunk size to use when reading from network stream.
				85	NET_IO_FILE_CHUNK = 16 * 1024
				86
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	87
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	88	# Read timeout in seconds for downloads from isolate storage. If there's no
				89	# response from the server within this timeout whole download will be aborted.
				90	DOWNLOAD_READ_TIMEOUT = 60
				91
				92
maruel@chromium.org	4160164	2013-09-18 19:40:46 +0000	[diff] [blame]	93	# The delay (in seconds) to wait between logging statements when retrieving
				94	# the required files. This is intended to let the user (or buildbot) know that
				95	# the program is still running.
				96	DELAY_BETWEEN_UPDATES_IN_SECS = 30
				97
				98
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	99	DEFAULT_BLACKLIST = (
				100	# Temporary vim or python files.
				101	r'^.+\.(?:pyc\|swp)$',
				102	# .git or .svn directory.
				103	r'^(?:.+' + re.escape(os.path.sep) + r'\|)\.(?:git\|svn)$',
				104	)
				105
				106
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	107	# A class to use to communicate with the server by default. Can be changed by
				108	# 'set_storage_api_class'. Default is IsolateServer.
				109	_storage_api_cls = None
				110
				111
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	112	class Error(Exception):
				113	"""Generic runtime error."""
				114	pass
				115
				116
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	117	class Aborted(Error):
				118	"""Operation aborted."""
				119	pass
				120
				121
nodir	90bc8dc	2016-06-15 13:35:21 -0700	[diff] [blame]	122	class AlreadyExists(Error):
				123	"""File already exists."""
				124
				125
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	126	def file_read(path, chunk_size=isolated_format.DISK_FILE_CHUNK, offset=0):
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	127	"""Yields file content in chunks of \|chunk_size\| starting from \|offset\|."""
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	128	with fs.open(path, 'rb') as f:
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	129	if offset:
				130	f.seek(offset)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	131	while True:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	132	data = f.read(chunk_size)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	133	if not data:
				134	break
				135	yield data
				136
				137
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	138	def file_write(path, content_generator):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	139	"""Writes file content as generated by content_generator.
				140
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	141	Creates the intermediary directory as needed.
				142
				143	Returns the number of bytes written.
				144
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	145	Meant to be mocked out in unit tests.
				146	"""
nodir	e5028a9	2016-04-29 14:38:21 -0700	[diff] [blame]	147	file_path.ensure_tree(os.path.dirname(path))
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	148	total = 0
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	149	with fs.open(path, 'wb') as f:
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	150	for d in content_generator:
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	151	total += len(d)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	152	f.write(d)
maruel@chromium.org	8750e4b	2013-09-18 02:37:57 +0000	[diff] [blame]	153	return total
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	154
				155
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	156	def fileobj_path(fileobj):
				157	"""Return file system path for file like object or None.
				158
				159	The returned path is guaranteed to exist and can be passed to file system
				160	operations like copy.
				161	"""
				162	name = getattr(fileobj, 'name', None)
				163	if name is None:
				164	return
				165
				166	# If the file like object was created using something like open("test.txt")
				167	# name will end up being a str (such as a function outside our control, like
				168	# the standard library). We want all our paths to be unicode objects, so we
				169	# decode it.
				170	if not isinstance(name, unicode):
				171	name = name.decode(sys.getfilesystemencoding())
				172
tansell	26de79e	2016-11-13 18:41:11 -0800	[diff] [blame^]	173	# fs.exists requires an absolute path, otherwise it will fail with an
				174	# assertion error.
				175	if not os.path.isabs(name):
				176	return
				177
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	178	if fs.exists(name):
				179	return name
				180
				181
				182	# TODO(tansell): Replace fileobj_copy with shutil.copyfileobj once proper file
				183	# wrappers have been created.
				184	def fileobj_copy(
				185	dstfileobj, srcfileobj, size=-1,
				186	chunk_size=isolated_format.DISK_FILE_CHUNK):
				187	"""Copy data from srcfileobj to dstfileobj.
				188
				189	Providing size means exactly that amount of data will be copied (if there
				190	isn't enough data, an IOError exception is thrown). Otherwise all data until
				191	the EOF marker will be copied.
				192	"""
				193	if size == -1 and hasattr(srcfileobj, 'tell'):
				194	if srcfileobj.tell() != 0:
				195	raise IOError('partial file but not using size')
				196
				197	written = 0
				198	while written != size:
				199	readsize = chunk_size
				200	if size > 0:
				201	readsize = min(readsize, size-written)
				202	data = srcfileobj.read(readsize)
				203	if not data:
				204	if size == -1:
				205	break
				206	raise IOError('partial file, got %s, wanted %s' % (written, size))
				207	dstfileobj.write(data)
				208	written += len(data)
				209
				210
				211	def putfile(srcfileobj, dstpath, file_mode=None, size=-1, use_symlink=False):
				212	"""Put srcfileobj at the given dstpath with given mode.
				213
				214	The function aims to do this as efficiently as possible while still allowing
				215	any possible file like object be given.
				216
				217	Creating a tree of hardlinks has a few drawbacks:
				218	- tmpfs cannot be used for the scratch space. The tree has to be on the same
				219	partition as the cache.
				220	- involves a write to the inode, which advances ctime, cause a metadata
				221	writeback (causing disk seeking).
				222	- cache ctime cannot be used to detect modifications / corruption.
				223	- Some file systems (NTFS) have a 64k limit on the number of hardlink per
				224	partition. This is why the function automatically fallbacks to copying the
				225	file content.
				226	- /proc/sys/fs/protected_hardlinks causes an additional check to ensure the
				227	same owner is for all hardlinks.
				228	- Anecdotal report that ext2 is known to be potentially faulty on high rate
				229	of hardlink creation.
				230
				231	Creating a tree of symlinks has a few drawbacks:
				232	- Tasks running the equivalent of os.path.realpath() will get the naked path
				233	and may fail.
				234	- Windows:
				235	- Symlinks are reparse points:
				236	https://msdn.microsoft.com/library/windows/desktop/aa365460.aspx
				237	https://msdn.microsoft.com/library/windows/desktop/aa363940.aspx
				238	- Symbolic links are Win32 paths, not NT paths.
				239	https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
				240	- Symbolic links are supported on Windows 7 and later only.
				241	- SeCreateSymbolicLinkPrivilege is needed, which is not present by
				242	default.
				243	- SeCreateSymbolicLinkPrivilege is stripped off by UAC when a restricted
				244	RID is present in the token;
				245	https://msdn.microsoft.com/en-us/library/bb530410.aspx
				246	"""
				247	srcpath = fileobj_path(srcfileobj)
				248	if srcpath and size == -1:
				249	readonly = file_mode is None or (
				250	file_mode & (stat.S_IWUSR \| stat.S_IWGRP \| stat.S_IWOTH))
				251
				252	if readonly:
				253	# If the file is read only we can link the file
				254	if use_symlink:
				255	link_mode = file_path.SYMLINK_WITH_FALLBACK
				256	else:
				257	link_mode = file_path.HARDLINK_WITH_FALLBACK
				258	else:
				259	# If not read only, we must copy the file
				260	link_mode = file_path.COPY
				261
				262	file_path.link_file(dstpath, srcpath, link_mode)
				263	else:
				264	# Need to write out the file
				265	with fs.open(dstpath, 'wb') as dstfileobj:
				266	fileobj_copy(dstfileobj, srcfileobj, size)
				267
				268	assert fs.exists(dstpath)
				269
				270	# file_mode of 0 is actually valid, so need explicit check.
				271	if file_mode is not None:
				272	fs.chmod(dstpath, file_mode)
				273
				274
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	275	def zip_compress(content_generator, level=7):
				276	"""Reads chunks from \|content_generator\| and yields zip compressed chunks."""
				277	compressor = zlib.compressobj(level)
				278	for chunk in content_generator:
				279	compressed = compressor.compress(chunk)
				280	if compressed:
				281	yield compressed
				282	tail = compressor.flush(zlib.Z_FINISH)
				283	if tail:
				284	yield tail
				285
				286
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	287	def zip_decompress(
				288	content_generator, chunk_size=isolated_format.DISK_FILE_CHUNK):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	289	"""Reads zipped data from \|content_generator\| and yields decompressed data.
				290
				291	Decompresses data in small chunks (no larger than \|chunk_size\|) so that
				292	zip bomb file doesn't cause zlib to preallocate huge amount of memory.
				293
				294	Raises IOError if data is corrupted or incomplete.
				295	"""
				296	decompressor = zlib.decompressobj()
				297	compressed_size = 0
				298	try:
				299	for chunk in content_generator:
				300	compressed_size += len(chunk)
				301	data = decompressor.decompress(chunk, chunk_size)
				302	if data:
				303	yield data
				304	while decompressor.unconsumed_tail:
				305	data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
				306	if data:
				307	yield data
				308	tail = decompressor.flush()
				309	if tail:
				310	yield tail
				311	except zlib.error as e:
				312	raise IOError(
				313	'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
				314	# Ensure all data was read and decompressed.
				315	if decompressor.unused_data or decompressor.unconsumed_tail:
				316	raise IOError('Not all data was decompressed')
				317
				318
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	319	def get_zip_compression_level(filename):
				320	"""Given a filename calculates the ideal zip compression level to use."""
				321	file_ext = os.path.splitext(filename)[1].lower()
				322	# TODO(csharp): Profile to find what compression level works best.
				323	return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
				324
				325
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	326	def create_directories(base_directory, files):
				327	"""Creates the directory structure needed by the given list of files."""
				328	logging.debug('create_directories(%s, %d)', base_directory, len(files))
				329	# Creates the tree of directories to create.
				330	directories = set(os.path.dirname(f) for f in files)
				331	for item in list(directories):
				332	while item:
				333	directories.add(item)
				334	item = os.path.dirname(item)
				335	for d in sorted(directories):
				336	if d:
aludwin	606aa1f	2016-10-31 18:41:30 -0700	[diff] [blame]	337	abs_d = os.path.join(base_directory, d)
				338	if not fs.isdir(abs_d):
				339	fs.mkdir(abs_d)
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	340
				341
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	342	def create_symlinks(base_directory, files):
				343	"""Creates any symlinks needed by the given set of files."""
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	344	for filepath, properties in files:
				345	if 'l' not in properties:
				346	continue
				347	if sys.platform == 'win32':
Marc-Antoine Ruel	ccafe0e	2013-11-08 16:15:36 -0500	[diff] [blame]	348	# TODO(maruel): Create symlink via the win32 api.
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	349	logging.warning('Ignoring symlink %s', filepath)
				350	continue
				351	outfile = os.path.join(base_directory, filepath)
nodir	90bc8dc	2016-06-15 13:35:21 -0700	[diff] [blame]	352	try:
				353	os.symlink(properties['l'], outfile) # pylint: disable=E1101
				354	except OSError as e:
				355	if e.errno == errno.EEXIST:
				356	raise AlreadyExists('File %s already exists.' % outfile)
				357	raise
maruel@chromium.org	af25485	2013-09-17 17:48:14 +0000	[diff] [blame]	358
				359
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	360	def is_valid_file(path, size):
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	361	"""Determines if the given files appears valid.
				362
				363	Currently it just checks the file's size.
				364	"""
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	365	if size == UNKNOWN_FILE_SIZE:
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	366	return fs.isfile(path)
				367	actual_size = fs.stat(path).st_size
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	368	if size != actual_size:
				369	logging.warning(
				370	'Found invalid item %s; %d != %d',
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	371	os.path.basename(path), actual_size, size)
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	372	return False
				373	return True
				374
				375
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	376	class Item(object):
				377	"""An item to push to Storage.
				378
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	379	Its digest and size may be provided in advance, if known. Otherwise they will
				380	be derived from content(). If digest is provided, it MUST correspond to
				381	hash algorithm used by Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	382
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	383	When used with Storage, Item starts its life in a main thread, travels
				384	to 'contains' thread, then to 'push' thread and then finally back to
				385	the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	386	"""
				387
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	388	def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	389	self.digest = digest
				390	self.size = size
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	391	self.high_priority = high_priority
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	392	self.compression_level = 6
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	393
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	394	def content(self):
				395	"""Iterable with content of this item as byte string (str) chunks."""
				396	raise NotImplementedError()
				397
				398	def prepare(self, hash_algo):
				399	"""Ensures self.digest and self.size are set.
				400
				401	Uses content() as a source of data to calculate them. Does nothing if digest
				402	and size is already known.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	403
				404	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	405	hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	406	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	407	if self.digest is None or self.size is None:
				408	digest = hash_algo()
				409	total = 0
				410	for chunk in self.content():
				411	digest.update(chunk)
				412	total += len(chunk)
				413	self.digest = digest.hexdigest()
				414	self.size = total
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	415
				416
				417	class FileItem(Item):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	418	"""A file to push to Storage.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	419
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	420	Its digest and size may be provided in advance, if known. Otherwise they will
				421	be derived from the file content.
				422	"""
				423
				424	def __init__(self, path, digest=None, size=None, high_priority=False):
				425	super(FileItem, self).__init__(
				426	digest,
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	427	size if size is not None else fs.stat(path).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	428	high_priority)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	429	self.path = path
				430	self.compression_level = get_zip_compression_level(path)
				431
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	432	def content(self):
				433	return file_read(self.path)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	434
				435
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	436	class BufferItem(Item):
				437	"""A byte buffer to push to Storage."""
				438
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	439	def __init__(self, buf, high_priority=False):
				440	super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	441	self.buffer = buf
				442
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	443	def content(self):
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	444	return [self.buffer]
				445
				446
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	447	class Storage(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	448	"""Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	449
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	450	Implements compression support, parallel 'contains' checks, parallel uploads
				451	and more.
				452
				453	Works only within single namespace (and thus hashing algorithm and compression
				454	scheme are fixed).
				455
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	456	Spawns multiple internal threads. Thread safe, but not fork safe. Modifies
				457	signal handlers table to handle Ctrl+C.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	458	"""
				459
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	460	def __init__(self, storage_api):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	461	self._storage_api = storage_api
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	462	self._use_zip = isolated_format.is_namespace_with_compression(
				463	storage_api.namespace)
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	464	self._hash_algo = isolated_format.get_hash_algo(storage_api.namespace)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	465	self._cpu_thread_pool = None
				466	self._net_thread_pool = None
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	467	self._aborted = False
				468	self._prev_sig_handlers = {}
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	469
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	470	@property
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	471	def hash_algo(self):
				472	"""Hashing algorithm used to name files in storage based on their content.
				473
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	474	Defined by \|namespace\|. See also isolated_format.get_hash_algo().
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	475	"""
				476	return self._hash_algo
				477
				478	@property
				479	def location(self):
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	480	"""URL of the backing store that this class is using."""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	481	return self._storage_api.location
				482
				483	@property
				484	def namespace(self):
				485	"""Isolate namespace used by this storage.
				486
				487	Indirectly defines hashing scheme and compression method used.
				488	"""
				489	return self._storage_api.namespace
				490
				491	@property
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	492	def cpu_thread_pool(self):
				493	"""ThreadPool for CPU-bound tasks like zipping."""
				494	if self._cpu_thread_pool is None:
Marc-Antoine Ruel	bdad118	2015-02-06 16:04:35 -0500	[diff] [blame]	495	threads = max(threading_utils.num_processors(), 2)
				496	if sys.maxsize <= 2L**32:
				497	# On 32 bits userland, do not try to use more than 16 threads.
				498	threads = min(threads, 16)
				499	self._cpu_thread_pool = threading_utils.ThreadPool(2, threads, 0, 'zip')
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	500	return self._cpu_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	501
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	502	@property
				503	def net_thread_pool(self):
				504	"""AutoRetryThreadPool for IO-bound tasks, retries IOError."""
				505	if self._net_thread_pool is None:
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	506	self._net_thread_pool = threading_utils.IOAutoRetryThreadPool()
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	507	return self._net_thread_pool
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	508
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	509	def close(self):
				510	"""Waits for all pending tasks to finish."""
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	511	logging.info('Waiting for all threads to die...')
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	512	if self._cpu_thread_pool:
				513	self._cpu_thread_pool.join()
				514	self._cpu_thread_pool.close()
				515	self._cpu_thread_pool = None
				516	if self._net_thread_pool:
				517	self._net_thread_pool.join()
				518	self._net_thread_pool.close()
				519	self._net_thread_pool = None
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	520	logging.info('Done.')
				521
				522	def abort(self):
				523	"""Cancels any pending or future operations."""
				524	# This is not strictly theadsafe, but in the worst case the logging message
				525	# will be printed twice. Not a big deal. In other places it is assumed that
				526	# unprotected reads and writes to _aborted are serializable (it is true
				527	# for python) and thus no locking is used.
				528	if not self._aborted:
				529	logging.warning('Aborting... It can take a while.')
				530	self._aborted = True
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	531
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	532	def __enter__(self):
				533	"""Context manager interface."""
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	534	assert not self._prev_sig_handlers, self._prev_sig_handlers
				535	for s in (signal.SIGINT, signal.SIGTERM):
				536	self._prev_sig_handlers[s] = signal.signal(s, lambda *_args: self.abort())
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	537	return self
				538
				539	def __exit__(self, _exc_type, _exc_value, _traceback):
				540	"""Context manager interface."""
				541	self.close()
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	542	while self._prev_sig_handlers:
				543	s, h = self._prev_sig_handlers.popitem()
				544	signal.signal(s, h)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	545	return False
				546
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	547	def upload_items(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	548	"""Uploads a bunch of items to the isolate server.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	549
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	550	It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	551
				552	Arguments:
				553	items: list of Item instances that represents data to upload.
				554
				555	Returns:
				556	List of items that were uploaded. All other items are already there.
				557	"""
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	558	logging.info('upload_items(items=%d)', len(items))
				559
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	560	# Ensure all digests are calculated.
				561	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	562	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	563
vadimsh@chromium.org	672cd2b	2013-10-08 17:49:33 +0000	[diff] [blame]	564	# For each digest keep only first Item that matches it. All other items
				565	# are just indistinguishable copies from the point of view of isolate
				566	# server (it doesn't care about paths at all, only content and digests).
				567	seen = {}
				568	duplicates = 0
				569	for item in items:
				570	if seen.setdefault(item.digest, item) is not item:
				571	duplicates += 1
				572	items = seen.values()
				573	if duplicates:
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	574	logging.info('Skipped %d files with duplicated content', duplicates)
vadimsh@chromium.org	672cd2b	2013-10-08 17:49:33 +0000	[diff] [blame]	575
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	576	# Enqueue all upload tasks.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	577	missing = set()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	578	uploaded = []
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	579	channel = threading_utils.TaskChannel()
				580	for missing_item, push_state in self.get_missing_items(items):
				581	missing.add(missing_item)
				582	self.async_push(channel, missing_item, push_state)
				583
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	584	# No need to spawn deadlock detector thread if there's nothing to upload.
				585	if missing:
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	586	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	587	# Wait for all started uploads to finish.
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	588	while len(uploaded) != len(missing):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	589	detector.ping()
				590	item = channel.pull()
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	591	uploaded.append(item)
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	592	logging.debug(
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	593	'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	594	logging.info('All files are uploaded')
				595
				596	# Print stats.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	597	total = len(items)
				598	total_size = sum(f.size for f in items)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	599	logging.info(
				600	'Total: %6d, %9.1fkb',
				601	total,
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	602	total_size / 1024.)
				603	cache_hit = set(items) - missing
				604	cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	605	logging.info(
				606	'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				607	len(cache_hit),
				608	cache_hit_size / 1024.,
				609	len(cache_hit) * 100. / total,
				610	cache_hit_size * 100. / total_size if total_size else 0)
				611	cache_miss = missing
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	612	cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	613	logging.info(
				614	'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
				615	len(cache_miss),
				616	cache_miss_size / 1024.,
				617	len(cache_miss) * 100. / total,
				618	cache_miss_size * 100. / total_size if total_size else 0)
				619
vadimsh@chromium.org	f24e5c3	2013-10-11 21:16:21 +0000	[diff] [blame]	620	return uploaded
				621
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	622	def async_push(self, channel, item, push_state):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	623	"""Starts asynchronous push to the server in a parallel thread.
				624
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	625	Can be used only after \|item\| was checked for presence on a server with
				626	'get_missing_items' call. 'get_missing_items' returns \|push_state\| object
				627	that contains storage specific information describing how to upload
				628	the item (for example in case of cloud storage, it is signed upload URLs).
				629
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	630	Arguments:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	631	channel: TaskChannel that receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	632	item: item to upload as instance of Item class.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	633	push_state: push state returned by 'get_missing_items' call for \|item\|.
				634
				635	Returns:
				636	None, but \|channel\| later receives back \|item\| when upload ends.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	637	"""
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	638	# Thread pool task priority.
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	639	priority = (
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	640	threading_utils.PRIORITY_HIGH if item.high_priority
				641	else threading_utils.PRIORITY_MED)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	642
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	643	def push(content):
Marc-Antoine Ruel	095a8be	2014-03-21 14:58:19 -0400	[diff] [blame]	644	"""Pushes an Item and returns it to \|channel\|."""
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	645	if self._aborted:
				646	raise Aborted()
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	647	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	648	self._storage_api.push(item, push_state, content)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	649	return item
				650
				651	# If zipping is not required, just start a push task.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	652	if not self._use_zip:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	653	self.net_thread_pool.add_task_with_channel(
				654	channel, priority, push, item.content())
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	655	return
				656
				657	# If zipping is enabled, zip in a separate thread.
				658	def zip_and_push():
				659	# TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
				660	# content right here. It will block until all file is zipped.
				661	try:
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	662	if self._aborted:
				663	raise Aborted()
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	664	stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	665	data = ''.join(stream)
				666	except Exception as exc:
				667	logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	668	channel.send_exception()
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	669	return
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	670	self.net_thread_pool.add_task_with_channel(
				671	channel, priority, push, [data])
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	672	self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	673
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	674	def push(self, item, push_state):
				675	"""Synchronously pushes a single item to the server.
				676
				677	If you need to push many items at once, consider using 'upload_items' or
				678	'async_push' with instance of TaskChannel.
				679
				680	Arguments:
				681	item: item to upload as instance of Item class.
				682	push_state: push state returned by 'get_missing_items' call for \|item\|.
				683
				684	Returns:
				685	Pushed item (same object as \|item\|).
				686	"""
				687	channel = threading_utils.TaskChannel()
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	688	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	689	self.async_push(channel, item, push_state)
				690	pushed = channel.pull()
				691	assert pushed is item
				692	return item
				693
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	694	def async_fetch(self, channel, priority, digest, size, sink):
				695	"""Starts asynchronous fetch from the server in a parallel thread.
				696
				697	Arguments:
				698	channel: TaskChannel that receives back \|digest\| when download ends.
				699	priority: thread pool task priority for the fetch.
				700	digest: hex digest of an item to download.
				701	size: expected size of the item (after decompression).
				702	sink: function that will be called as sink(generator).
				703	"""
				704	def fetch():
				705	try:
				706	# Prepare reading pipeline.
				707	stream = self._storage_api.fetch(digest)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	708	if self._use_zip:
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	709	stream = zip_decompress(stream, isolated_format.DISK_FILE_CHUNK)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	710	# Run \|stream\| through verifier that will assert its size.
				711	verifier = FetchStreamVerifier(stream, size)
				712	# Verified stream goes to \|sink\|.
				713	sink(verifier.run())
				714	except Exception as err:
Vadim Shtayura	0ffc409	2013-11-20 17:49:52 -0800	[diff] [blame]	715	logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	716	raise
				717	return digest
				718
				719	# Don't bother with zip_thread_pool for decompression. Decompression is
				720	# really fast and most probably IO bound anyway.
				721	self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
				722
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	723	def get_missing_items(self, items):
				724	"""Yields items that are missing from the server.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	725
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	726	Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	727
				728	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	729	items: a list of Item objects to check.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	730
				731	Yields:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	732	For each missing item it yields a pair (item, push_state), where:
				733	* item - Item object that is missing (one of \|items\|).
				734	* push_state - opaque object that contains storage specific information
				735	describing how to upload the item (for example in case of cloud
				736	storage, it is signed upload URLs). It can later be passed to
				737	'async_push'.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	738	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	739	channel = threading_utils.TaskChannel()
				740	pending = 0
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	741
				742	# Ensure all digests are calculated.
				743	for item in items:
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	744	item.prepare(self._hash_algo)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	745
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	746	def contains(batch):
				747	if self._aborted:
				748	raise Aborted()
				749	return self._storage_api.contains(batch)
				750
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	751	# Enqueue all requests.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	752	for batch in batch_items_for_check(items):
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	753	self.net_thread_pool.add_task_with_channel(
Vadim Shtayura	f9e401b	2014-10-15 18:19:37 +0400	[diff] [blame]	754	channel, threading_utils.PRIORITY_HIGH, contains, batch)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	755	pending += 1
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	756
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	757	# Yield results as they come in.
				758	for _ in xrange(pending):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	759	for missing_item, push_state in channel.pull().iteritems():
				760	yield missing_item, push_state
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	761
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	762
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	763	def batch_items_for_check(items):
				764	"""Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	765
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	766	Each batch corresponds to a single 'exists?' query to the server via a call
				767	to StorageApi's 'contains' method.
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	768
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	769	Arguments:
				770	items: a list of Item objects.
				771
				772	Yields:
				773	Batches of items to query for existence in a single operation,
				774	each batch is a list of Item objects.
				775	"""
				776	batch_count = 0
				777	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
				778	next_queries = []
				779	for item in sorted(items, key=lambda x: x.size, reverse=True):
				780	next_queries.append(item)
				781	if len(next_queries) == batch_size_limit:
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	782	yield next_queries
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	783	next_queries = []
				784	batch_count += 1
				785	batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
				786	min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
				787	if next_queries:
				788	yield next_queries
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	789
				790
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	791	class FetchQueue(object):
				792	"""Fetches items from Storage and places them into LocalCache.
				793
				794	It manages multiple concurrent fetch operations. Acts as a bridge between
				795	Storage and LocalCache so that Storage and LocalCache don't depend on each
				796	other at all.
				797	"""
				798
				799	def __init__(self, storage, cache):
				800	self.storage = storage
				801	self.cache = cache
				802	self._channel = threading_utils.TaskChannel()
				803	self._pending = set()
				804	self._accessed = set()
				805	self._fetched = cache.cached_set()
				806
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	807	def add(
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	808	self,
				809	digest,
				810	size=UNKNOWN_FILE_SIZE,
				811	priority=threading_utils.PRIORITY_MED):
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	812	"""Starts asynchronous fetch of item \|digest\|."""
				813	# Fetching it now?
				814	if digest in self._pending:
				815	return
				816
				817	# Mark this file as in use, verify_all_cached will later ensure it is still
				818	# in cache.
				819	self._accessed.add(digest)
				820
				821	# Already fetched? Notify cache to update item's LRU position.
				822	if digest in self._fetched:
				823	# 'touch' returns True if item is in cache and not corrupted.
				824	if self.cache.touch(digest, size):
				825	return
				826	# Item is corrupted, remove it from cache and fetch it again.
				827	self._fetched.remove(digest)
				828	self.cache.evict(digest)
				829
				830	# TODO(maruel): It should look at the free disk space, the current cache
				831	# size and the size of the new item on every new item:
				832	# - Trim the cache as more entries are listed when free disk space is low,
				833	# otherwise if the amount of data downloaded during the run > free disk
				834	# space, it'll crash.
				835	# - Make sure there's enough free disk space to fit all dependencies of
				836	# this run! If not, abort early.
				837
				838	# Start fetching.
				839	self._pending.add(digest)
				840	self.storage.async_fetch(
				841	self._channel, priority, digest, size,
				842	functools.partial(self.cache.write, digest))
				843
				844	def wait(self, digests):
				845	"""Starts a loop that waits for at least one of \|digests\| to be retrieved.
				846
				847	Returns the first digest retrieved.
				848	"""
				849	# Flush any already fetched items.
				850	for digest in digests:
				851	if digest in self._fetched:
				852	return digest
				853
				854	# Ensure all requested items are being fetched now.
				855	assert all(digest in self._pending for digest in digests), (
				856	digests, self._pending)
				857
				858	# Wait for some requested item to finish fetching.
				859	while self._pending:
				860	digest = self._channel.pull()
				861	self._pending.remove(digest)
				862	self._fetched.add(digest)
				863	if digest in digests:
				864	return digest
				865
				866	# Should never reach this point due to assert above.
				867	raise RuntimeError('Impossible state')
				868
				869	def inject_local_file(self, path, algo):
				870	"""Adds local file to the cache as if it was fetched from storage."""
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	871	with fs.open(path, 'rb') as f:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	872	data = f.read()
				873	digest = algo(data).hexdigest()
				874	self.cache.write(digest, [data])
				875	self._fetched.add(digest)
				876	return digest
				877
				878	@property
				879	def pending_count(self):
				880	"""Returns number of items to be fetched."""
				881	return len(self._pending)
				882
				883	def verify_all_cached(self):
				884	"""True if all accessed items are in cache."""
				885	return self._accessed.issubset(self.cache.cached_set())
				886
				887
				888	class FetchStreamVerifier(object):
				889	"""Verifies that fetched file is valid before passing it to the LocalCache."""
				890
				891	def __init__(self, stream, expected_size):
Marc-Antoine Ruel	df4976d	2015-04-15 19:56:21 -0400	[diff] [blame]	892	assert stream is not None
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	893	self.stream = stream
				894	self.expected_size = expected_size
				895	self.current_size = 0
				896
				897	def run(self):
				898	"""Generator that yields same items as \|stream\|.
				899
				900	Verifies \|stream\| is complete before yielding a last chunk to consumer.
				901
				902	Also wraps IOError produced by consumer into MappingError exceptions since
				903	otherwise Storage will retry fetch on unrelated local cache errors.
				904	"""
				905	# Read one chunk ahead, keep it in \|stored\|.
				906	# That way a complete stream can be verified before pushing last chunk
				907	# to consumer.
				908	stored = None
				909	for chunk in self.stream:
				910	assert chunk is not None
				911	if stored is not None:
				912	self._inspect_chunk(stored, is_last=False)
				913	try:
				914	yield stored
				915	except IOError as exc:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	916	raise isolated_format.MappingError(
				917	'Failed to store an item in cache: %s' % exc)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	918	stored = chunk
				919	if stored is not None:
				920	self._inspect_chunk(stored, is_last=True)
				921	try:
				922	yield stored
				923	except IOError as exc:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	924	raise isolated_format.MappingError(
				925	'Failed to store an item in cache: %s' % exc)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	926
				927	def _inspect_chunk(self, chunk, is_last):
				928	"""Called for each fetched chunk before passing it to consumer."""
				929	self.current_size += len(chunk)
Marc-Antoine Ruel	1e7658c	2014-08-28 19:46:39 -0400	[diff] [blame]	930	if (is_last and
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	931	(self.expected_size != UNKNOWN_FILE_SIZE) and
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	932	(self.expected_size != self.current_size)):
				933	raise IOError('Incorrect file size: expected %d, got %d' % (
				934	self.expected_size, self.current_size))
				935
				936
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	937	class StorageApi(object):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	938	"""Interface for classes that implement low-level storage operations.
				939
				940	StorageApi is oblivious of compression and hashing scheme used. This details
				941	are handled in higher level Storage class.
				942
				943	Clients should generally not use StorageApi directly. Storage class is
				944	preferred since it implements compression and upload optimizations.
				945	"""
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	946
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	947	@property
				948	def location(self):
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	949	"""URL of the backing store that this class is using."""
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	950	raise NotImplementedError()
				951
				952	@property
				953	def namespace(self):
				954	"""Isolate namespace used by this storage.
				955
				956	Indirectly defines hashing scheme and compression method used.
				957	"""
				958	raise NotImplementedError()
				959
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	960	def fetch(self, digest, offset=0):
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	961	"""Fetches an object and yields its content.
				962
				963	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	964	digest: hash digest of item to download.
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	965	offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	966
				967	Yields:
				968	Chunks of downloaded item (as str objects).
				969	"""
				970	raise NotImplementedError()
				971
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	972	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	973	"""Uploads an \|item\| with content generated by \|content\| generator.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	974
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	975	\|item\| MUST go through 'contains' call to get \|push_state\| before it can
				976	be pushed to the storage.
				977
				978	To be clear, here is one possible usage:
				979	all_items = [... all items to push as Item subclasses ...]
				980	for missing_item, push_state in storage_api.contains(all_items).items():
				981	storage_api.push(missing_item, push_state)
				982
				983	When pushing to a namespace with compression, data that should be pushed
				984	and data provided by the item is not the same. In that case \|content\| is
				985	not None and it yields chunks of compressed data (using item.content() as
				986	a source of original uncompressed data). This is implemented by Storage
				987	class.
				988
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	989	Arguments:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	990	item: Item object that holds information about an item being pushed.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	991	push_state: push state object as returned by 'contains' call.
				992	content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	993
				994	Returns:
				995	None.
				996	"""
				997	raise NotImplementedError()
				998
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	999	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1000	"""Checks for \|items\| on the server, prepares missing ones for upload.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1001
				1002	Arguments:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1003	items: list of Item objects to check for presence.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1004
				1005	Returns:
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1006	A dict missing Item -> opaque push state object to be passed to 'push'.
				1007	See doc string for 'push'.
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1008	"""
				1009	raise NotImplementedError()
				1010
				1011
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1012	class _IsolateServerPushState(object):
				1013	"""Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	1014
				1015	Note this needs to be a global class to support pickling.
				1016	"""
				1017
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1018	def __init__(self, preupload_status, size):
				1019	self.preupload_status = preupload_status
				1020	gs_upload_url = preupload_status.get('gs_upload_url') or None
				1021	if gs_upload_url:
				1022	self.upload_url = gs_upload_url
maruel	380e326	2016-08-31 16:10:06 -0700	[diff] [blame]	1023	self.finalize_url = 'api/isolateservice/v1/finalize_gs_upload'
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1024	else:
maruel	380e326	2016-08-31 16:10:06 -0700	[diff] [blame]	1025	self.upload_url = 'api/isolateservice/v1/store_inline'
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1026	self.finalize_url = None
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	1027	self.uploaded = False
				1028	self.finalized = False
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1029	self.size = size
Mike Frysinger	27f03da	2014-02-12 16:47:01 -0500	[diff] [blame]	1030
				1031
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1032	class IsolateServer(StorageApi):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1033	"""StorageApi implementation that downloads and uploads to Isolate Server.
				1034
				1035	It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1036	Works only within single namespace.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1037	"""
				1038
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	1039	def __init__(self, base_url, namespace):
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1040	super(IsolateServer, self).__init__()
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1041	assert file_path.is_url(base_url), base_url
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1042	self._base_url = base_url.rstrip('/')
				1043	self._namespace = namespace
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1044	self._namespace_dict = {
				1045	'compression': 'flate' if namespace.endswith(
				1046	('-gzip', '-flate')) else '',
				1047	'digest_hash': 'sha-1',
				1048	'namespace': namespace,
				1049	}
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1050	self._lock = threading.Lock()
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1051	self._server_caps = None
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1052	self._memory_use = 0
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1053
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1054	@property
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1055	def _server_capabilities(self):
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1056	"""Gets server details.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1057
				1058	Returns:
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1059	Server capabilities dictionary as returned by /server_details endpoint.
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1060	"""
maruel@chromium.org	3e42ce8	2013-09-12 18:36:59 +0000	[diff] [blame]	1061	# TODO(maruel): Make this request much earlier asynchronously while the
				1062	# files are being enumerated.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1063
				1064	# TODO(vadimsh): Put \|namespace\| in the URL so that server can apply
				1065	# namespace-level ACLs to this call.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1066
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1067	with self._lock:
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1068	if self._server_caps is None:
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1069	self._server_caps = net.url_read_json(
maruel	380e326	2016-08-31 16:10:06 -0700	[diff] [blame]	1070	url='%s/api/isolateservice/v1/server_details' % self._base_url,
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1071	data={})
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1072	return self._server_caps
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1073
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	1074	@property
				1075	def location(self):
				1076	return self._base_url
				1077
				1078	@property
				1079	def namespace(self):
				1080	return self._namespace
				1081
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1082	def fetch(self, digest, offset=0):
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1083	assert offset >= 0
maruel	380e326	2016-08-31 16:10:06 -0700	[diff] [blame]	1084	source_url = '%s/api/isolateservice/v1/retrieve' % (
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1085	self._base_url)
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1086	logging.debug('download_file(%s, %d)', source_url, offset)
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1087	response = self.do_fetch(source_url, digest, offset)
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1088
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1089	if not response:
maruel	e154f9c	2015-09-14 11:03:15 -0700	[diff] [blame]	1090	raise IOError(
				1091	'Attempted to fetch from %s; no data exist: %s / %s.' % (
				1092	source_url, self._namespace, digest))
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1093
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1094	# for DB uploads
				1095	content = response.get('content')
				1096	if content is not None:
maruel	863ac26	2016-03-17 11:00:37 -0700	[diff] [blame]	1097	yield base64.b64decode(content)
				1098	return
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1099
				1100	# for GS entities
				1101	connection = net.url_open(response['url'])
maruel	f557475	2015-09-17 13:40:27 -0700	[diff] [blame]	1102	if not connection:
				1103	raise IOError('Failed to download %s / %s' % (self._namespace, digest))
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1104
				1105	# If \|offset\|, verify server respects it by checking Content-Range.
Vadim Shtayura	f0cb97a	2013-12-05 13:57:49 -0800	[diff] [blame]	1106	if offset:
				1107	content_range = connection.get_header('Content-Range')
				1108	if not content_range:
				1109	raise IOError('Missing Content-Range header')
				1110
				1111	# 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
				1112	# According to a spec, <size> can be '*' meaning "Total size of the file
				1113	# is not known in advance".
				1114	try:
				1115	match = re.match(r'bytes (\d+)-(\d+)/(\d+\|\*)', content_range)
				1116	if not match:
				1117	raise ValueError()
				1118	content_offset = int(match.group(1))
				1119	last_byte_index = int(match.group(2))
				1120	size = None if match.group(3) == '*' else int(match.group(3))
				1121	except ValueError:
				1122	raise IOError('Invalid Content-Range header: %s' % content_range)
				1123
				1124	# Ensure returned offset equals requested one.
				1125	if offset != content_offset:
				1126	raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
				1127	offset, content_offset, content_range))
				1128
				1129	# Ensure entire tail of the file is returned.
				1130	if size is not None and last_byte_index + 1 != size:
				1131	raise IOError('Incomplete response. Content-Range: %s' % content_range)
				1132
maruel	863ac26	2016-03-17 11:00:37 -0700	[diff] [blame]	1133	for data in connection.iter_content(NET_IO_FILE_CHUNK):
				1134	yield data
maruel@chromium.org	e45728d	2013-09-16 23:23:22 +0000	[diff] [blame]	1135
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1136	def push(self, item, push_state, content=None):
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1137	assert isinstance(item, Item)
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1138	assert item.digest is not None
				1139	assert item.size is not None
				1140	assert isinstance(push_state, _IsolateServerPushState)
				1141	assert not push_state.finalized
				1142
				1143	# Default to item.content().
				1144	content = item.content() if content is None else content
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1145	logging.info('Push state size: %d', push_state.size)
				1146	if isinstance(content, (basestring, list)):
				1147	# Memory is already used, too late.
				1148	with self._lock:
				1149	self._memory_use += push_state.size
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1150	else:
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1151	# TODO(vadimsh): Do not read from \|content\| generator when retrying push.
				1152	# If \|content\| is indeed a generator, it can not be re-winded back to the
				1153	# beginning of the stream. A retry will find it exhausted. A possible
				1154	# solution is to wrap \|content\| generator with some sort of caching
				1155	# restartable generator. It should be done alongside streaming support
				1156	# implementation.
				1157	#
				1158	# In theory, we should keep the generator, so that it is not serialized in
				1159	# memory. Sadly net.HttpService.request() requires the body to be
				1160	# serialized.
				1161	assert isinstance(content, types.GeneratorType), repr(content)
				1162	slept = False
				1163	# HACK HACK HACK. Please forgive me for my sins but OMG, it works!
Marc-Antoine Ruel	e6677c8	2015-02-05 14:54:22 -0500	[diff] [blame]	1164	# One byte less than 512mb. This is to cope with incompressible content.
				1165	max_size = int(sys.maxsize * 0.25)
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1166	while True:
				1167	with self._lock:
				1168	# This is due to 32 bits python when uploading very large files. The
				1169	# problem is that it's comparing uncompressed sizes, while we care
				1170	# about compressed sizes since it's what is serialized in memory.
				1171	# The first check assumes large files are compressible and that by
				1172	# throttling one upload at once, we can survive. Otherwise, kaboom.
				1173	memory_use = self._memory_use
				1174	if ((push_state.size >= max_size and not memory_use) or
				1175	(memory_use + push_state.size <= max_size)):
				1176	self._memory_use += push_state.size
				1177	memory_use = self._memory_use
				1178	break
				1179	time.sleep(0.1)
				1180	slept = True
				1181	if slept:
				1182	logging.info('Unblocked: %d %d', memory_use, push_state.size)
vadimsh@chromium.org	7cdf1c0	2013-09-25 00:24:16 +0000	[diff] [blame]	1183
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1184	try:
				1185	# This push operation may be a retry after failed finalization call below,
				1186	# no need to reupload contents in that case.
				1187	if not push_state.uploaded:
				1188	# PUT file to \|upload_url\|.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1189	success = self.do_push(push_state, content)
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1190	if not success:
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1191	raise IOError('Failed to upload file with hash %s to URL %s' % (
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1192	item.digest, push_state.upload_url))
				1193	push_state.uploaded = True
				1194	else:
				1195	logging.info(
				1196	'A file %s already uploaded, retrying finalization only',
				1197	item.digest)
				1198
				1199	# Optionally notify the server that it's done.
				1200	if push_state.finalize_url:
				1201	# TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
				1202	# send it to isolated server. That way isolate server can verify that
				1203	# the data safely reached Google Storage (GS provides MD5 and CRC32C of
				1204	# stored files).
				1205	# TODO(maruel): Fix the server to accept properly data={} so
				1206	# url_read_json() can be used.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1207	response = net.url_read_json(
				1208	url='%s/%s' % (self._base_url, push_state.finalize_url),
				1209	data={
				1210	'upload_ticket': push_state.preupload_status['upload_ticket'],
				1211	})
				1212	if not response or not response['ok']:
				1213	raise IOError('Failed to finalize file with hash %s.' % item.digest)
Marc-Antoine Ruel	e98dde9	2015-01-22 14:53:05 -0500	[diff] [blame]	1214	push_state.finalized = True
				1215	finally:
				1216	with self._lock:
				1217	self._memory_use -= push_state.size
maruel@chromium.org	d1e20c9	2013-09-17 20:54:26 +0000	[diff] [blame]	1218
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1219	def contains(self, items):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1220	# Ensure all items were initialized with 'prepare' call. Storage does that.
				1221	assert all(i.digest is not None and i.size is not None for i in items)
				1222
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1223	# Request body is a json encoded list of dicts.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1224	body = {
				1225	'items': [
				1226	{
				1227	'digest': item.digest,
				1228	'is_isolated': bool(item.high_priority),
				1229	'size': item.size,
				1230	} for item in items
				1231	],
				1232	'namespace': self._namespace_dict,
				1233	}
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1234
maruel	380e326	2016-08-31 16:10:06 -0700	[diff] [blame]	1235	query_url = '%s/api/isolateservice/v1/preupload' % self._base_url
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1236
				1237	# Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1238	response = None
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1239	try:
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1240	response = net.url_read_json(url=query_url, data=body)
				1241	if response is None:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1242	raise isolated_format.MappingError(
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1243	'Failed to execute preupload query')
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1244	except ValueError as err:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	1245	raise isolated_format.MappingError(
Marc-Antoine Ruel	0a62061	2014-08-13 15:47:07 -0400	[diff] [blame]	1246	'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1247
				1248	# Pick Items that are missing, attach _PushState to them.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1249	missing_items = {}
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1250	for preupload_status in response.get('items', []):
				1251	assert 'upload_ticket' in preupload_status, (
				1252	preupload_status, '/preupload did not generate an upload ticket')
				1253	index = int(preupload_status['index'])
				1254	missing_items[items[index]] = _IsolateServerPushState(
				1255	preupload_status, items[index].size)
vadimsh@chromium.org	35122be	2013-09-19 02:48:00 +0000	[diff] [blame]	1256	logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.org	bcb966b	2013-10-01 18:14:18 +0000	[diff] [blame]	1257	len(items), len(items) - len(missing_items))
				1258	return missing_items
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1259
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1260	def do_fetch(self, url, digest, offset):
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1261	"""Fetches isolated data from the URL.
				1262
				1263	Used only for fetching files, not for API calls. Can be overridden in
				1264	subclasses.
				1265
				1266	Args:
				1267	url: URL to fetch the data from, can possibly return http redirect.
				1268	offset: byte offset inside the file to start fetching from.
				1269
				1270	Returns:
				1271	net.HttpResponse compatible object, with 'read' and 'get_header' calls.
				1272	"""
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1273	assert isinstance(offset, int)
				1274	data = {
				1275	'digest': digest.encode('utf-8'),
				1276	'namespace': self._namespace_dict,
				1277	'offset': offset,
				1278	}
maruel	0c25f4f	2015-12-15 05:41:17 -0800	[diff] [blame]	1279	# TODO(maruel): url + '?' + urllib.urlencode(data) once a HTTP GET endpoint
				1280	# is added.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1281	return net.url_read_json(
				1282	url=url,
				1283	data=data,
				1284	read_timeout=DOWNLOAD_READ_TIMEOUT)
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1285
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1286	def do_push(self, push_state, content):
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1287	"""Uploads isolated file to the URL.
				1288
				1289	Used only for storing files, not for API calls. Can be overridden in
				1290	subclasses.
				1291
				1292	Args:
				1293	url: URL to upload the data to.
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1294	push_state: an _IsolateServicePushState instance
				1295	item: the original Item to be uploaded
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1296	content: an iterable that yields 'str' chunks.
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1297	"""
				1298	# A cheezy way to avoid memcpy of (possibly huge) file, until streaming
				1299	# upload support is implemented.
				1300	if isinstance(content, list) and len(content) == 1:
				1301	content = content[0]
				1302	else:
				1303	content = ''.join(content)
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1304
				1305	# DB upload
				1306	if not push_state.finalize_url:
				1307	url = '%s/%s' % (self._base_url, push_state.upload_url)
				1308	content = base64.b64encode(content)
				1309	data = {
				1310	'upload_ticket': push_state.preupload_status['upload_ticket'],
				1311	'content': content,
				1312	}
				1313	response = net.url_read_json(url=url, data=data)
				1314	return response is not None and response['ok']
				1315
				1316	# upload to GS
				1317	url = push_state.upload_url
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1318	response = net.url_read(
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1319	content_type='application/octet-stream',
				1320	data=content,
				1321	method='PUT',
tandrii	b44d54d	2016-02-10 11:31:41 -0800	[diff] [blame]	1322	headers={'Cache-Control': 'public, max-age=31536000'},
Cory Massaro	cc19c8c	2015-03-10 13:35:11 -0700	[diff] [blame]	1323	url=url)
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1324	return response is not None
				1325
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	1326
nodir	445097b	2016-06-03 22:50:26 -0700	[diff] [blame]	1327	class CacheMiss(Exception):
				1328	"""Raised when an item is not in cache."""
				1329
				1330	def __init__(self, digest):
				1331	self.digest = digest
				1332	super(CacheMiss, self).__init__(
				1333	'Item with digest %r is not found in cache' % digest)
				1334
				1335
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1336	class LocalCache(object):
				1337	"""Local cache that stores objects fetched via Storage.
				1338
				1339	It can be accessed concurrently from multiple threads, so it should protect
				1340	its internal state with some lock.
				1341	"""
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	1342	cache_dir = None
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1343
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1344	def __init__(self):
				1345	self._lock = threading_utils.LockWithAssert()
				1346	# Profiling values.
				1347	self._added = []
				1348	self._initial_number_items = 0
				1349	self._initial_size = 0
				1350	self._evicted = []
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1351	self._used = []
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1352
nodir	be642ff	2016-06-09 15:51:51 -0700	[diff] [blame]	1353	def __contains__(self, digest):
				1354	raise NotImplementedError()
				1355
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1356	def __enter__(self):
				1357	"""Context manager interface."""
				1358	return self
				1359
				1360	def __exit__(self, _exc_type, _exec_value, _traceback):
				1361	"""Context manager interface."""
				1362	return False
				1363
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1364	@property
				1365	def added(self):
				1366	return self._added[:]
				1367
				1368	@property
				1369	def evicted(self):
				1370	return self._evicted[:]
				1371
				1372	@property
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1373	def used(self):
				1374	return self._used[:]
				1375
				1376	@property
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1377	def initial_number_items(self):
				1378	return self._initial_number_items
				1379
				1380	@property
				1381	def initial_size(self):
				1382	return self._initial_size
				1383
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1384	def cached_set(self):
				1385	"""Returns a set of all cached digests (always a new object)."""
				1386	raise NotImplementedError()
				1387
maruel	36a963d	2016-04-08 17:15:49 -0700	[diff] [blame]	1388	def cleanup(self):
				1389	"""Deletes any corrupted item from the cache and trims it if necessary."""
				1390	raise NotImplementedError()
				1391
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1392	def touch(self, digest, size):
				1393	"""Ensures item is not corrupted and updates its LRU position.
				1394
				1395	Arguments:
				1396	digest: hash digest of item to check.
				1397	size: expected size of this item.
				1398
				1399	Returns:
				1400	True if item is in cache and not corrupted.
				1401	"""
				1402	raise NotImplementedError()
				1403
				1404	def evict(self, digest):
				1405	"""Removes item from cache if it's there."""
				1406	raise NotImplementedError()
				1407
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1408	def getfileobj(self, digest):
				1409	"""Returns a readable file like object.
				1410
				1411	If file exists on the file system it will have a .name attribute with an
				1412	absolute path to the file.
				1413	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1414	raise NotImplementedError()
				1415
				1416	def write(self, digest, content):
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1417	"""Reads data from \|content\| generator and stores it in cache.
				1418
				1419	Returns digest to simplify chaining.
				1420	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1421	raise NotImplementedError()
				1422
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1423
				1424	class MemoryCache(LocalCache):
				1425	"""LocalCache implementation that stores everything in memory."""
				1426
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1427	def __init__(self, file_mode_mask=0500):
				1428	"""Args:
				1429	file_mode_mask: bit mask to AND file mode with. Default value will make
				1430	all mapped files to be read only.
				1431	"""
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1432	super(MemoryCache, self).__init__()
Vadim Shtayura	e3fbd10	2014-04-29 17:05:21 -0700	[diff] [blame]	1433	self._file_mode_mask = file_mode_mask
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1434	self._contents = {}
				1435
nodir	be642ff	2016-06-09 15:51:51 -0700	[diff] [blame]	1436	def __contains__(self, digest):
				1437	with self._lock:
				1438	return digest in self._contents
				1439
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1440	def cached_set(self):
				1441	with self._lock:
				1442	return set(self._contents)
				1443
maruel	36a963d	2016-04-08 17:15:49 -0700	[diff] [blame]	1444	def cleanup(self):
				1445	pass
				1446
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1447	def touch(self, digest, size):
				1448	with self._lock:
				1449	return digest in self._contents
				1450
				1451	def evict(self, digest):
				1452	with self._lock:
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1453	v = self._contents.pop(digest, None)
				1454	if v is not None:
				1455	self._evicted.add(v)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1456
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1457	def getfileobj(self, digest):
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1458	with self._lock:
nodir	445097b	2016-06-03 22:50:26 -0700	[diff] [blame]	1459	try:
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1460	d = self._contents[digest]
nodir	445097b	2016-06-03 22:50:26 -0700	[diff] [blame]	1461	except KeyError:
				1462	raise CacheMiss(digest)
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1463	self._used.append(len(d))
				1464	return io.BytesIO(d)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1465
				1466	def write(self, digest, content):
				1467	# Assemble whole stream before taking the lock.
				1468	data = ''.join(content)
				1469	with self._lock:
				1470	self._contents[digest] = data
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1471	self._added.append(len(data))
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1472	return digest
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1473
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	1474
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1475	class CachePolicies(object):
				1476	def __init__(self, max_cache_size, min_free_space, max_items):
				1477	"""
				1478	Arguments:
				1479	- max_cache_size: Trim if the cache gets larger than this value. If 0, the
				1480	cache is effectively a leak.
				1481	- min_free_space: Trim if disk free space becomes lower than this value. If
				1482	0, it unconditionally fill the disk.
				1483	- max_items: Maximum number of items to keep in the cache. If 0, do not
				1484	enforce a limit.
				1485	"""
				1486	self.max_cache_size = max_cache_size
				1487	self.min_free_space = min_free_space
				1488	self.max_items = max_items
				1489
				1490
				1491	class DiskCache(LocalCache):
				1492	"""Stateful LRU cache in a flat hash table in a directory.
				1493
				1494	Saves its state as json file.
				1495	"""
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	1496	STATE_FILE = u'state.json'
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1497
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	1498	def __init__(self, cache_dir, policies, hash_algo, trim=True):
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1499	"""
				1500	Arguments:
				1501	cache_dir: directory where to place the cache.
				1502	policies: cache retention policies.
				1503	algo: hashing algorithm used.
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	1504	trim: if True to enforce \|policies\| right away.
				1505	It can be done later by calling trim() explicitly.
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1506	"""
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1507	# All protected methods (starting with '_') except _path should be called
				1508	# with self._lock held.
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1509	super(DiskCache, self).__init__()
				1510	self.cache_dir = cache_dir
				1511	self.policies = policies
				1512	self.hash_algo = hash_algo
				1513	self.state_file = os.path.join(cache_dir, self.STATE_FILE)
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1514	# Items in a LRU lookup dict(digest: size).
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1515	self._lru = lru.LRUDict()
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1516	# Current cached free disk space. It is updated by self._trim().
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	1517	file_path.ensure_tree(self.cache_dir)
				1518	self._free_disk = file_path.get_free_space(self.cache_dir)
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1519	# The first item in the LRU cache that must not be evicted during this run
				1520	# since it was referenced. All items more recent that _protected in the LRU
				1521	# cache are also inherently protected. It could be a set() of all items
				1522	# referenced but this increases memory usage without a use case.
				1523	self._protected = None
maruel	36a963d	2016-04-08 17:15:49 -0700	[diff] [blame]	1524	# Cleanup operations done by self._load(), if any.
				1525	self._operations = []
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1526	with tools.Profiler('Setup'):
				1527	with self._lock:
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	1528	self._load(trim=trim)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1529
nodir	be642ff	2016-06-09 15:51:51 -0700	[diff] [blame]	1530	def __contains__(self, digest):
				1531	with self._lock:
				1532	return digest in self._lru
				1533
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1534	def __enter__(self):
				1535	return self
				1536
				1537	def __exit__(self, _exc_type, _exec_value, _traceback):
				1538	with tools.Profiler('CleanupTrimming'):
				1539	with self._lock:
				1540	self._trim()
				1541
				1542	logging.info(
				1543	'%5d (%8dkb) added',
				1544	len(self._added), sum(self._added) / 1024)
				1545	logging.info(
				1546	'%5d (%8dkb) current',
				1547	len(self._lru),
				1548	sum(self._lru.itervalues()) / 1024)
				1549	logging.info(
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1550	'%5d (%8dkb) evicted',
				1551	len(self._evicted), sum(self._evicted) / 1024)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1552	logging.info(
				1553	' %8dkb free',
				1554	self._free_disk / 1024)
				1555	return False
				1556
				1557	def cached_set(self):
				1558	with self._lock:
				1559	return self._lru.keys_set()
				1560
maruel	36a963d	2016-04-08 17:15:49 -0700	[diff] [blame]	1561	def cleanup(self):
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1562	"""Cleans up the cache directory.
				1563
				1564	Ensures there is no unknown files in cache_dir.
				1565	Ensures the read-only bits are set correctly.
				1566
				1567	At that point, the cache was already loaded, trimmed to respect cache
				1568	policies.
				1569	"""
				1570	fs.chmod(self.cache_dir, 0700)
				1571	# Ensure that all files listed in the state still exist and add new ones.
				1572	previous = self._lru.keys_set()
				1573	# It'd be faster if there were a readdir() function.
				1574	for filename in fs.listdir(self.cache_dir):
				1575	if filename == self.STATE_FILE:
				1576	fs.chmod(os.path.join(self.cache_dir, filename), 0600)
				1577	continue
				1578	if filename in previous:
				1579	fs.chmod(os.path.join(self.cache_dir, filename), 0400)
				1580	previous.remove(filename)
				1581	continue
				1582
				1583	# An untracked file. Delete it.
				1584	logging.warning('Removing unknown file %s from cache', filename)
				1585	p = self._path(filename)
				1586	if fs.isdir(p):
				1587	try:
				1588	file_path.rmtree(p)
				1589	except OSError:
				1590	pass
				1591	else:
				1592	file_path.try_remove(p)
				1593	continue
				1594
				1595	if previous:
				1596	# Filter out entries that were not found.
				1597	logging.warning('Removed %d lost files', len(previous))
				1598	for filename in previous:
				1599	self._lru.pop(filename)
maruel	36a963d	2016-04-08 17:15:49 -0700	[diff] [blame]	1600
				1601	# What remains to be done is to hash every single item to
				1602	# detect corruption, then save to ensure state.json is up to date.
				1603	# Sadly, on a 50Gb cache with 100mib/s I/O, this is still over 8 minutes.
				1604	# TODO(maruel): Let's revisit once directory metadata is stored in
				1605	# state.json so only the files that had been mapped since the last cleanup()
				1606	# call are manually verified.
				1607	#
				1608	#with self._lock:
				1609	# for digest in self._lru:
				1610	# if not isolated_format.is_valid_hash(
				1611	# self._path(digest), self.hash_algo):
				1612	# self.evict(digest)
				1613	# logging.info('Deleted corrupted item: %s', digest)
				1614
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1615	def touch(self, digest, size):
				1616	"""Verifies an actual file is valid.
				1617
				1618	Note that is doesn't compute the hash so it could still be corrupted if the
				1619	file size didn't change.
				1620
				1621	TODO(maruel): More stringent verification while keeping the check fast.
				1622	"""
				1623	# Do the check outside the lock.
				1624	if not is_valid_file(self._path(digest), size):
				1625	return False
				1626
				1627	# Update it's LRU position.
				1628	with self._lock:
				1629	if digest not in self._lru:
				1630	return False
				1631	self._lru.touch(digest)
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1632	self._protected = self._protected or digest
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1633	return True
				1634
				1635	def evict(self, digest):
				1636	with self._lock:
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1637	# Do not check for 'digest == self._protected' since it could be because
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1638	# the object is corrupted.
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1639	self._lru.pop(digest)
				1640	self._delete_file(digest, UNKNOWN_FILE_SIZE)
				1641
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1642	def getfileobj(self, digest):
nodir	445097b	2016-06-03 22:50:26 -0700	[diff] [blame]	1643	try:
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1644	f = fs.open(self._path(digest), 'rb')
				1645	with self._lock:
				1646	self._used.append(self._lru[digest])
				1647	return f
nodir	445097b	2016-06-03 22:50:26 -0700	[diff] [blame]	1648	except IOError:
				1649	raise CacheMiss(digest)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1650
				1651	def write(self, digest, content):
Marc-Antoine Ruel	df4976d	2015-04-15 19:56:21 -0400	[diff] [blame]	1652	assert content is not None
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1653	with self._lock:
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1654	self._protected = self._protected or digest
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1655	path = self._path(digest)
				1656	# A stale broken file may remain. It is possible for the file to have write
				1657	# access bit removed which would cause the file_write() call to fail to open
				1658	# in write mode. Take no chance here.
				1659	file_path.try_remove(path)
				1660	try:
				1661	size = file_write(path, content)
				1662	except:
				1663	# There are two possible places were an exception can occur:
				1664	# 1) Inside \|content\| generator in case of network or unzipping errors.
				1665	# 2) Inside file_write itself in case of disk IO errors.
				1666	# In any case delete an incomplete file and propagate the exception to
				1667	# caller, it will be logged there.
				1668	file_path.try_remove(path)
				1669	raise
				1670	# Make the file read-only in the cache. This has a few side-effects since
				1671	# the file node is modified, so every directory entries to this file becomes
				1672	# read-only. It's fine here because it is a new file.
				1673	file_path.set_read_only(path, True)
				1674	with self._lock:
				1675	self._add(digest, size)
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1676	return digest
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1677
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	1678	def get_oldest(self):
				1679	"""Returns digest of the LRU item or None."""
				1680	try:
				1681	return self._lru.get_oldest()[0]
				1682	except KeyError:
				1683	return None
				1684
				1685	def get_timestamp(self, digest):
				1686	"""Returns timestamp of last use of an item.
				1687
				1688	Raises KeyError if item is not found.
				1689	"""
				1690	return self._lru.get_timestamp(digest)
				1691
				1692	def trim(self):
				1693	"""Forces retention policies."""
				1694	with self._lock:
				1695	self._trim()
				1696
				1697	def _load(self, trim):
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1698	"""Loads state of the cache from json file.
				1699
				1700	If cache_dir does not exist on disk, it is created.
				1701	"""
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1702	self._lock.assert_locked()
				1703
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1704	if not fs.isfile(self.state_file):
				1705	if not os.path.isdir(self.cache_dir):
				1706	fs.makedirs(self.cache_dir)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1707	else:
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1708	# Load state of the cache.
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1709	try:
				1710	self._lru = lru.LRUDict.load(self.state_file)
				1711	except ValueError as err:
				1712	logging.error('Failed to load cache state: %s' % (err,))
				1713	# Don't want to keep broken state file.
				1714	file_path.try_remove(self.state_file)
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	1715	if trim:
				1716	self._trim()
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1717	# We want the initial cache size after trimming, i.e. what is readily
				1718	# avaiable.
				1719	self._initial_number_items = len(self._lru)
				1720	self._initial_size = sum(self._lru.itervalues())
				1721	if self._evicted:
				1722	logging.info(
				1723	'Trimming evicted items with the following sizes: %s',
				1724	sorted(self._evicted))
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1725
				1726	def _save(self):
				1727	"""Saves the LRU ordering."""
				1728	self._lock.assert_locked()
				1729	if sys.platform != 'win32':
				1730	d = os.path.dirname(self.state_file)
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	1731	if fs.isdir(d):
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1732	# Necessary otherwise the file can't be created.
				1733	file_path.set_read_only(d, False)
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	1734	if fs.isfile(self.state_file):
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1735	file_path.set_read_only(self.state_file, False)
				1736	self._lru.save(self.state_file)
				1737
				1738	def _trim(self):
				1739	"""Trims anything we don't know, make sure enough free space exists."""
				1740	self._lock.assert_locked()
				1741
				1742	# Ensure maximum cache size.
				1743	if self.policies.max_cache_size:
				1744	total_size = sum(self._lru.itervalues())
				1745	while total_size > self.policies.max_cache_size:
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1746	total_size -= self._remove_lru_file(True)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1747
				1748	# Ensure maximum number of items in the cache.
				1749	if self.policies.max_items and len(self._lru) > self.policies.max_items:
				1750	for _ in xrange(len(self._lru) - self.policies.max_items):
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1751	self._remove_lru_file(True)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1752
				1753	# Ensure enough free space.
				1754	self._free_disk = file_path.get_free_space(self.cache_dir)
kjlubick	ea9abf0	2016-06-01 09:34:33 -0700	[diff] [blame]	1755	trimmed_due_to_space = 0
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1756	while (
				1757	self.policies.min_free_space and
				1758	self._lru and
				1759	self._free_disk < self.policies.min_free_space):
kjlubick	ea9abf0	2016-06-01 09:34:33 -0700	[diff] [blame]	1760	trimmed_due_to_space += 1
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1761	self._remove_lru_file(True)
kjlubick	ea9abf0	2016-06-01 09:34:33 -0700	[diff] [blame]	1762
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1763	if trimmed_due_to_space:
				1764	total_usage = sum(self._lru.itervalues())
				1765	usage_percent = 0.
				1766	if total_usage:
kjlubick	ea9abf0	2016-06-01 09:34:33 -0700	[diff] [blame]	1767	usage_percent = 100. * float(total_usage) / self.policies.max_cache_size
				1768
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1769	logging.warning(
kjlubick	ea9abf0	2016-06-01 09:34:33 -0700	[diff] [blame]	1770	'Trimmed %s file(s) due to not enough free disk space: %.1fkb free,'
				1771	' %.1fkb cache (%.1f%% of its maximum capacity of %.1fkb)',
				1772	trimmed_due_to_space,
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1773	self._free_disk / 1024.,
				1774	total_usage / 1024.,
kjlubick	ea9abf0	2016-06-01 09:34:33 -0700	[diff] [blame]	1775	usage_percent,
				1776	self.policies.max_cache_size / 1024.)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1777	self._save()
				1778
				1779	def _path(self, digest):
				1780	"""Returns the path to one item."""
				1781	return os.path.join(self.cache_dir, digest)
				1782
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1783	def _remove_lru_file(self, allow_protected):
				1784	"""Removes the lastest recently used file and returns its size."""
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1785	self._lock.assert_locked()
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1786	try:
nodir	eabc11c	2016-10-18 16:37:28 -0700	[diff] [blame]	1787	digest, (size, _) = self._lru.get_oldest()
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1788	if not allow_protected and digest == self._protected:
				1789	raise Error('Not enough space to map the whole isolated tree')
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1790	except KeyError:
				1791	raise Error('Nothing to remove')
nodir	eabc11c	2016-10-18 16:37:28 -0700	[diff] [blame]	1792	digest, (size, _) = self._lru.pop_oldest()
kjlubick	ea9abf0	2016-06-01 09:34:33 -0700	[diff] [blame]	1793	logging.debug("Removing LRU file %s", digest)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1794	self._delete_file(digest, size)
				1795	return size
				1796
				1797	def _add(self, digest, size=UNKNOWN_FILE_SIZE):
				1798	"""Adds an item into LRU cache marking it as a newest one."""
				1799	self._lock.assert_locked()
				1800	if size == UNKNOWN_FILE_SIZE:
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	1801	size = fs.stat(self._path(digest)).st_size
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1802	self._added.append(size)
				1803	self._lru.add(digest, size)
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1804	self._free_disk -= size
				1805	# Do a quicker version of self._trim(). It only enforces free disk space,
				1806	# not cache size limits. It doesn't actually look at real free disk space,
				1807	# only uses its cache values. self._trim() will be called later to enforce
				1808	# real trimming but doing this quick version here makes it possible to map
				1809	# an isolated that is larger than the current amount of free disk space when
				1810	# the cache size is already large.
				1811	while (
				1812	self.policies.min_free_space and
				1813	self._lru and
				1814	self._free_disk < self.policies.min_free_space):
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	1815	self._remove_lru_file(False)
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1816
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1817	def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
				1818	"""Deletes cache file from the file system."""
				1819	self._lock.assert_locked()
				1820	try:
				1821	if size == UNKNOWN_FILE_SIZE:
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	1822	size = fs.stat(self._path(digest)).st_size
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1823	file_path.try_remove(self._path(digest))
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	1824	self._evicted.append(size)
maruel	083fa55	2016-04-08 14:38:01 -0700	[diff] [blame]	1825	self._free_disk += size
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	1826	except OSError as e:
				1827	logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
				1828
				1829
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1830	class IsolatedBundle(object):
				1831	"""Fetched and parsed .isolated file with all dependencies."""
				1832
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1833	def __init__(self):
				1834	self.command = []
				1835	self.files = {}
				1836	self.read_only = None
				1837	self.relative_cwd = None
				1838	# The main .isolated file, a IsolatedFile instance.
				1839	self.root = None
				1840
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1841	def fetch(self, fetch_queue, root_isolated_hash, algo):
				1842	"""Fetches the .isolated and all the included .isolated.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1843
				1844	It enables support for "included" .isolated files. They are processed in
				1845	strict order but fetched asynchronously from the cache. This is important so
				1846	that a file in an included .isolated file that is overridden by an embedding
				1847	.isolated file is not fetched needlessly. The includes are fetched in one
				1848	pass and the files are fetched as soon as all the ones on the left-side
				1849	of the tree were fetched.
				1850
				1851	The prioritization is very important here for nested .isolated files.
				1852	'includes' have the highest priority and the algorithm is optimized for both
				1853	deep and wide trees. A deep one is a long link of .isolated files referenced
				1854	one at a time by one item in 'includes'. A wide one has a large number of
				1855	'includes' in a single .isolated file. 'left' is defined as an included
				1856	.isolated file earlier in the 'includes' list. So the order of the elements
				1857	in 'includes' is important.
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1858
				1859	As a side effect this method starts asynchronous fetch of all data files
				1860	by adding them to \|fetch_queue\|. It doesn't wait for data files to finish
				1861	fetching though.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1862	"""
				1863	self.root = isolated_format.IsolatedFile(root_isolated_hash, algo)
				1864
				1865	# Isolated files being retrieved now: hash -> IsolatedFile instance.
				1866	pending = {}
				1867	# Set of hashes of already retrieved items to refuse recursive includes.
				1868	seen = set()
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1869	# Set of IsolatedFile's whose data files have already being fetched.
				1870	processed = set()
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1871
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1872	def retrieve_async(isolated_file):
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1873	h = isolated_file.obj_hash
				1874	if h in seen:
				1875	raise isolated_format.IsolatedError(
				1876	'IsolatedFile %s is retrieved recursively' % h)
				1877	assert h not in pending
				1878	seen.add(h)
				1879	pending[h] = isolated_file
				1880	fetch_queue.add(h, priority=threading_utils.PRIORITY_HIGH)
				1881
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1882	# Start fetching root *.isolated file (single file, not the whole bundle).
				1883	retrieve_async(self.root)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1884
				1885	while pending:
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1886	# Wait until some *.isolated file is fetched, parse it.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1887	item_hash = fetch_queue.wait(pending)
				1888	item = pending.pop(item_hash)
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1889	with fetch_queue.cache.getfileobj(item_hash) as f:
				1890	item.load(f.read())
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1891
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1892	# Start fetching included *.isolated files.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1893	for new_child in item.children:
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1894	retrieve_async(new_child)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1895
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1896	# Always fetch *.isolated files in traversal order, waiting if necessary
				1897	# until next to-be-processed node loads. "Waiting" is done by yielding
				1898	# back to the outer loop, that waits until some *.isolated is loaded.
				1899	for node in isolated_format.walk_includes(self.root):
				1900	if node not in processed:
				1901	# Not visited, and not yet loaded -> wait for it to load.
				1902	if not node.is_loaded:
				1903	break
				1904	# Not visited and loaded -> process it and continue the traversal.
				1905	self._start_fetching_files(node, fetch_queue)
				1906	processed.add(node)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1907
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1908	# All *.isolated files should be processed by now and only them.
				1909	all_isolateds = set(isolated_format.walk_includes(self.root))
				1910	assert all_isolateds == processed, (all_isolateds, processed)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1911
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1912	# Extract 'command' and other bundle properties.
				1913	for node in isolated_format.walk_includes(self.root):
				1914	self._update_self(node)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1915	self.relative_cwd = self.relative_cwd or ''
				1916
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1917	def _start_fetching_files(self, isolated, fetch_queue):
				1918	"""Starts fetching files from \|isolated\| that are not yet being fetched.
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1919
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1920	Modifies self.files.
				1921	"""
				1922	logging.debug('fetch_files(%s)', isolated.obj_hash)
				1923	for filepath, properties in isolated.data.get('files', {}).iteritems():
				1924	# Root isolated has priority on the files being mapped. In particular,
				1925	# overridden files must not be fetched.
				1926	if filepath not in self.files:
				1927	self.files[filepath] = properties
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	1928
				1929	# Make sure if the isolated is read only, the mode doesn't have write
				1930	# bits.
				1931	if 'm' in properties and self.read_only:
				1932	properties['m'] &= ~(stat.S_IWUSR \| stat.S_IWGRP \| stat.S_IWOTH)
				1933
				1934	# Preemptively request hashed files.
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1935	if 'h' in properties:
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	1936	logging.debug('fetching %s', filepath)
				1937	fetch_queue.add(
				1938	properties['h'], properties['s'], threading_utils.PRIORITY_MED)
				1939
				1940	def _update_self(self, node):
				1941	"""Extracts bundle global parameters from loaded *.isolated file.
				1942
				1943	Will be called with each loaded *.isolated file in order of traversal of
				1944	isolated include graph (see isolated_format.walk_includes).
				1945	"""
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	1946	# Grabs properties.
				1947	if not self.command and node.data.get('command'):
				1948	# Ensure paths are correctly separated on windows.
				1949	self.command = node.data['command']
				1950	if self.command:
				1951	self.command[0] = self.command[0].replace('/', os.path.sep)
				1952	self.command = tools.fix_python_path(self.command)
				1953	if self.read_only is None and node.data.get('read_only') is not None:
				1954	self.read_only = node.data['read_only']
				1955	if (self.relative_cwd is None and
				1956	node.data.get('relative_cwd') is not None):
				1957	self.relative_cwd = node.data['relative_cwd']
				1958
				1959
Vadim Shtayura	8623c27	2014-12-01 11:45:27 -0800	[diff] [blame]	1960	def set_storage_api_class(cls):
				1961	"""Replaces StorageApi implementation used by default."""
				1962	global _storage_api_cls
				1963	assert _storage_api_cls is None
				1964	assert issubclass(cls, StorageApi)
				1965	_storage_api_cls = cls
				1966
				1967
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1968	def get_storage_api(url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1969	"""Returns an object that implements low-level StorageApi interface.
				1970
				1971	It is used by Storage to work with single isolate \|namespace\|. It should
				1972	rarely be used directly by clients, see 'get_storage' for
				1973	a better alternative.
				1974
				1975	Arguments:
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1976	url: URL of isolate service to use shared cloud based storage.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1977	namespace: isolate namespace to operate in, also defines hashing and
				1978	compression scheme used, i.e. namespace names that end with '-gzip'
				1979	store compressed data.
				1980
				1981	Returns:
				1982	Instance of StorageApi subclass.
				1983	"""
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1984	cls = _storage_api_cls or IsolateServer
				1985	return cls(url, namespace)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	1986
				1987
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1988	def get_storage(url, namespace):
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1989	"""Returns Storage class that can upload and download from \|namespace\|.
				1990
				1991	Arguments:
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	1992	url: URL of isolate service to use shared cloud based storage.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	1993	namespace: isolate namespace to operate in, also defines hashing and
				1994	compression scheme used, i.e. namespace names that end with '-gzip'
				1995	store compressed data.
				1996
				1997	Returns:
				1998	Instance of Storage.
				1999	"""
Marc-Antoine Ruel	b10edf2	2014-12-11 13:33:57 -0500	[diff] [blame]	2000	return Storage(get_storage_api(url, namespace))
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	2001
maruel@chromium.org	dedbf49	2013-09-12 20:42:11 +0000	[diff] [blame]	2002
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	2003	def upload_tree(base_url, infiles, namespace):
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	2004	"""Uploads the given tree to the given url.
				2005
				2006	Arguments:
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	2007	base_url: The url of the isolate server to upload to.
				2008	infiles: iterable of pairs (absolute path, metadata dict) of files.
csharp@chromium.org	59c7bcf	2012-11-21 21:13:18 +0000	[diff] [blame]	2009	namespace: The namespace to use on the server.
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	2010	"""
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	2011	# Convert \|infiles\| into a list of FileItem objects, skip duplicates.
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2012	# Filter out symlinks, since they are not represented by items on isolate
				2013	# server side.
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	2014	items = []
				2015	seen = set()
				2016	skipped = 0
				2017	for filepath, metadata in infiles:
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2018	assert isinstance(filepath, unicode), filepath
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	2019	if 'l' not in metadata and filepath not in seen:
				2020	seen.add(filepath)
				2021	item = FileItem(
				2022	path=filepath,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2023	digest=metadata['h'],
				2024	size=metadata['s'],
				2025	high_priority=metadata.get('priority') == '0')
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	2026	items.append(item)
				2027	else:
				2028	skipped += 1
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2029
Vadim Shtayura	ea38c57	2014-10-06 16:57:16 -0700	[diff] [blame]	2030	logging.info('Skipped %d duplicated entries', skipped)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2031	with get_storage(base_url, namespace) as storage:
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	2032	return storage.upload_items(items)
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	2033
				2034
maruel	4409e30	2016-07-19 14:25:51 -0700	[diff] [blame]	2035	def fetch_isolated(isolated_hash, storage, cache, outdir, use_symlinks):
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2036	"""Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2037
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2038	Arguments:
				2039	isolated_hash: hash of the root *.isolated file.
				2040	storage: Storage class that communicates with isolate storage.
				2041	cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2042	outdir: Output directory to map file tree to.
maruel	4409e30	2016-07-19 14:25:51 -0700	[diff] [blame]	2043	use_symlinks: Use symlinks instead of hardlinks when True.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2044
				2045	Returns:
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	2046	IsolatedBundle object that holds details about loaded *.isolated file.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2047	"""
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	2048	logging.debug(
maruel	4409e30	2016-07-19 14:25:51 -0700	[diff] [blame]	2049	'fetch_isolated(%s, %s, %s, %s, %s)',
				2050	isolated_hash, storage, cache, outdir, use_symlinks)
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2051	# Hash algorithm to use, defined by namespace \|storage\| is using.
				2052	algo = storage.hash_algo
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2053	with cache:
				2054	fetch_queue = FetchQueue(storage, cache)
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	2055	bundle = IsolatedBundle()
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2056
				2057	with tools.Profiler('GetIsolateds'):
				2058	# Optionally support local files by manually adding them to cache.
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	2059	if not isolated_format.is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	2060	logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
maruel	1ceb387	2015-10-14 06:10:44 -0700	[diff] [blame]	2061	path = unicode(os.path.abspath(isolated_hash))
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	2062	try:
maruel	1ceb387	2015-10-14 06:10:44 -0700	[diff] [blame]	2063	isolated_hash = fetch_queue.inject_local_file(path, algo)
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	2064	except IOError:
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	2065	raise isolated_format.MappingError(
Marc-Antoine Ruel	4e8cd18	2014-06-18 13:27:17 -0400	[diff] [blame]	2066	'%s doesn\'t seem to be a valid file. Did you intent to pass a '
				2067	'valid hash?' % isolated_hash)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2068
				2069	# Load all *.isolated and start loading rest of the files.
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	2070	bundle.fetch(fetch_queue, isolated_hash, algo)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2071
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2072	with tools.Profiler('GetRest'):
				2073	# Create file system hierarchy.
nodir	e5028a9	2016-04-29 14:38:21 -0700	[diff] [blame]	2074	file_path.ensure_tree(outdir)
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	2075	create_directories(outdir, bundle.files)
				2076	create_symlinks(outdir, bundle.files.iteritems())
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2077
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2078	# Ensure working directory exists.
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	2079	cwd = os.path.normpath(os.path.join(outdir, bundle.relative_cwd))
nodir	e5028a9	2016-04-29 14:38:21 -0700	[diff] [blame]	2080	file_path.ensure_tree(cwd)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2081
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2082	# Multimap: digest -> list of pairs (path, props).
				2083	remaining = {}
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	2084	for filepath, props in bundle.files.iteritems():
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2085	if 'h' in props:
				2086	remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2087
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2088	# Now block on the remaining files to be downloaded and mapped.
				2089	logging.info('Retrieving remaining files (%d of them)...',
				2090	fetch_queue.pending_count)
				2091	last_update = time.time()
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	2092	with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2093	while remaining:
				2094	detector.ping()
				2095
				2096	# Wait for any item to finish fetching to cache.
				2097	digest = fetch_queue.wait(remaining)
				2098
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	2099	# Create the files in the destination using item in cache as the
				2100	# source.
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2101	for filepath, props in remaining.pop(digest):
tansell	9e04a8d	2016-07-28 09:31:59 -0700	[diff] [blame]	2102	fullpath = os.path.join(outdir, filepath)
				2103
				2104	with cache.getfileobj(digest) as srcfileobj:
tansell	e4288c3	2016-07-28 09:45:40 -0700	[diff] [blame]	2105	filetype = props.get('t', 'basic')
				2106
				2107	if filetype == 'basic':
				2108	file_mode = props.get('m')
				2109	if file_mode:
				2110	# Ignore all bits apart from the user
				2111	file_mode &= 0700
				2112	putfile(
				2113	srcfileobj, fullpath, file_mode,
				2114	use_symlink=use_symlinks)
				2115
tansell	26de79e	2016-11-13 18:41:11 -0800	[diff] [blame^]	2116	elif filetype == 'tar':
				2117	basedir = os.path.dirname(fullpath)
				2118	with tarfile.TarFile(fileobj=srcfileobj) as extractor:
				2119	for ti in extractor:
				2120	if not ti.isfile():
				2121	logging.warning(
				2122	'Path(%r) is nonfile (%s), skipped',
				2123	ti.name, ti.type)
				2124	continue
				2125	fp = os.path.normpath(os.path.join(basedir, ti.name))
				2126	if not fp.startswith(basedir):
				2127	logging.error(
				2128	'Path(%r) is outside root directory',
				2129	fp)
				2130	ifd = extractor.extractfile(ti)
				2131	file_path.ensure_tree(os.path.dirname(fp))
				2132	putfile(ifd, fp, 0700, ti.size)
				2133
tansell	e4288c3	2016-07-28 09:45:40 -0700	[diff] [blame]	2134	elif filetype == 'ar':
				2135	basedir = os.path.dirname(fullpath)
				2136	extractor = arfile.ArFileReader(srcfileobj, fullparse=False)
				2137	for ai, ifd in extractor:
				2138	fp = os.path.normpath(os.path.join(basedir, ai.name))
tansell	26de79e	2016-11-13 18:41:11 -0800	[diff] [blame^]	2139	if not fp.startswith(basedir):
				2140	logging.error(
				2141	'Path(%r) is outside root directory',
				2142	fp)
tansell	e4288c3	2016-07-28 09:45:40 -0700	[diff] [blame]	2143	file_path.ensure_tree(os.path.dirname(fp))
				2144	putfile(ifd, fp, 0700, ai.size)
				2145
				2146	else:
				2147	raise isolated_format.IsolatedError(
				2148	'Unknown file type %r', filetype)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2149
				2150	# Report progress.
				2151	duration = time.time() - last_update
				2152	if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
				2153	msg = '%d files remaining...' % len(remaining)
				2154	print msg
				2155	logging.info(msg)
				2156	last_update = time.time()
				2157
				2158	# Cache could evict some items we just tried to fetch, it's a fatal error.
				2159	if not fetch_queue.verify_all_cached():
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	2160	raise isolated_format.MappingError(
				2161	'Cache is too small to hold all requested files')
Vadim Shtayura	7f7459c	2014-09-04 13:25:10 -0700	[diff] [blame]	2162	return bundle
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2163
				2164
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2165	def directory_to_metadata(root, algo, blacklist):
				2166	"""Returns the FileItem list and .isolated metadata for a directory."""
				2167	root = file_path.get_native_path_case(root)
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	2168	paths = isolated_format.expand_directory_and_symlink(
Vadim Shtayura	439d3fc	2014-05-07 16:05:12 -0700	[diff] [blame]	2169	root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	2170	metadata = {
				2171	relpath: isolated_format.file_to_metadata(
Marc-Antoine Ruel	f1d827c	2014-11-24 15:22:25 -0500	[diff] [blame]	2172	os.path.join(root, relpath), {}, 0, algo)
Marc-Antoine Ruel	9225779	2014-08-28 20:51:08 -0400	[diff] [blame]	2173	for relpath in paths
				2174	}
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2175	for v in metadata.itervalues():
				2176	v.pop('t')
				2177	items = [
				2178	FileItem(
				2179	path=os.path.join(root, relpath),
				2180	digest=meta['h'],
				2181	size=meta['s'],
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2182	high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2183	for relpath, meta in metadata.iteritems() if 'h' in meta
				2184	]
				2185	return items, metadata
				2186
				2187
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2188	def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2189	"""Stores every entries and returns the relevant data.
				2190
				2191	Arguments:
				2192	storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2193	files: list of file paths to upload. If a directory is specified, a
				2194	.isolated file is created and its hash is returned.
				2195	blacklist: function that returns True if a file should be omitted.
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	2196
				2197	Returns:
				2198	tuple(list(tuple(hash, path)), list(FileItem cold), list(FileItem hot)).
				2199	The first file in the first item is always the isolated file.
Marc-Antoine Ruel	2283ad1	2014-02-09 11:14:57 -0500	[diff] [blame]	2200	"""
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2201	assert all(isinstance(i, unicode) for i in files), files
				2202	if len(files) != len(set(map(os.path.abspath, files))):
				2203	raise Error('Duplicate entries found.')
				2204
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	2205	# List of tuple(hash, path).
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2206	results = []
				2207	# The temporary directory is only created as needed.
				2208	tempdir = None
				2209	try:
				2210	# TODO(maruel): Yield the files to a worker thread.
				2211	items_to_upload = []
				2212	for f in files:
				2213	try:
				2214	filepath = os.path.abspath(f)
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2215	if fs.isdir(filepath):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2216	# Uploading a whole directory.
Vadim Shtayura	e0ab190	2014-04-29 10:55:27 -0700	[diff] [blame]	2217	items, metadata = directory_to_metadata(
				2218	filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2219
				2220	# Create the .isolated file.
				2221	if not tempdir:
Marc-Antoine Ruel	3c979cb	2015-03-11 13:43:28 -0400	[diff] [blame]	2222	tempdir = tempfile.mkdtemp(prefix=u'isolateserver')
				2223	handle, isolated = tempfile.mkstemp(dir=tempdir, suffix=u'.isolated')
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2224	os.close(handle)
				2225	data = {
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	2226	'algo':
				2227	isolated_format.SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2228	'files': metadata,
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	2229	'version': isolated_format.ISOLATED_FILE_VERSION,
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2230	}
Marc-Antoine Ruel	52436aa	2014-08-28 21:57:57 -0400	[diff] [blame]	2231	isolated_format.save_isolated(isolated, data)
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	2232	h = isolated_format.hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2233	items_to_upload.extend(items)
				2234	items_to_upload.append(
				2235	FileItem(
				2236	path=isolated,
				2237	digest=h,
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2238	size=fs.stat(isolated).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2239	high_priority=True))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2240	results.append((h, f))
				2241
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2242	elif fs.isfile(filepath):
Marc-Antoine Ruel	8bee66d	2014-08-28 19:02:07 -0400	[diff] [blame]	2243	h = isolated_format.hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2244	items_to_upload.append(
				2245	FileItem(
				2246	path=filepath,
				2247	digest=h,
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2248	size=fs.stat(filepath).st_size,
Vadim Shtayura	bcff74f	2014-02-27 16:19:34 -0800	[diff] [blame]	2249	high_priority=f.endswith('.isolated')))
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2250	results.append((h, f))
				2251	else:
				2252	raise Error('%s is neither a file or directory.' % f)
				2253	except OSError:
				2254	raise Error('Failed to process %s.' % f)
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	2255	uploaded = storage.upload_items(items_to_upload)
				2256	cold = [i for i in items_to_upload if i in uploaded]
				2257	hot = [i for i in items_to_upload if i not in uploaded]
				2258	return results, cold, hot
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2259	finally:
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2260	if tempdir and fs.isdir(tempdir):
Marc-Antoine Ruel	e4ad07e	2014-10-15 20:22:29 -0400	[diff] [blame]	2261	file_path.rmtree(tempdir)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2262
				2263
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2264	def archive(out, namespace, files, blacklist):
				2265	if files == ['-']:
				2266	files = sys.stdin.readlines()
				2267
				2268	if not files:
				2269	raise Error('Nothing to upload')
				2270
				2271	files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2272	blacklist = tools.gen_blacklist(blacklist)
				2273	with get_storage(out, namespace) as storage:
maruel	064c0a3	2016-04-05 11:47:15 -0700	[diff] [blame]	2274	# Ignore stats.
				2275	results = archive_files_to_storage(storage, files, blacklist)[0]
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2276	print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
				2277
				2278
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2279	@subcommand.usage('<file1..fileN> or - to read from stdin')
				2280	def CMDarchive(parser, args):
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2281	"""Archives data to the server.
				2282
				2283	If a directory is specified, a .isolated file is created the whole directory
				2284	is uploaded. Then this .isolated file can be included in another one to run
				2285	commands.
				2286
				2287	The commands output each file that was processed with its content hash. For
				2288	directories, the .isolated generated for the directory is listed as the
				2289	directory entry itself.
				2290	"""
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2291	add_isolate_server_options(parser)
Marc-Antoine Ruel	1f8ba35	2014-11-04 15:55:03 -0500	[diff] [blame]	2292	add_archive_options(parser)
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2293	options, files = parser.parse_args(args)
nodir	55be77b	2016-05-03 09:39:57 -0700	[diff] [blame]	2294	process_isolate_server_options(parser, options, True, True)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2295	try:
Marc-Antoine Ruel	488ce8f	2014-02-09 11:25:04 -0500	[diff] [blame]	2296	archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2297	except Error as e:
				2298	parser.error(e.args[0])
Marc-Antoine Ruel	fcc3cd8	2013-11-19 16:31:38 -0500	[diff] [blame]	2299	return 0
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2300
				2301
				2302	def CMDdownload(parser, args):
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2303	"""Download data from the server.
				2304
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2305	It can either download individual files or a complete tree from a .isolated
				2306	file.
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2307	"""
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2308	add_isolate_server_options(parser)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2309	parser.add_option(
Marc-Antoine Ruel	185ded4	2015-01-28 20:49:18 -0500	[diff] [blame]	2310	'-s', '--isolated', metavar='HASH',
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2311	help='hash of an isolated file, .isolated file content is discarded, use '
				2312	'--file if you need it')
				2313	parser.add_option(
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2314	'-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
				2315	help='hash and destination of a file, can be used multiple times')
				2316	parser.add_option(
Marc-Antoine Ruel	f90861c	2015-03-24 20:54:49 -0400	[diff] [blame]	2317	'-t', '--target', metavar='DIR', default='download',
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2318	help='destination directory')
maruel	4409e30	2016-07-19 14:25:51 -0700	[diff] [blame]	2319	parser.add_option(
				2320	'--use-symlinks', action='store_true',
				2321	help='Use symlinks instead of hardlinks')
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2322	add_cache_options(parser)
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2323	options, args = parser.parse_args(args)
				2324	if args:
				2325	parser.error('Unsupported arguments: %s' % args)
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2326
nodir	55be77b	2016-05-03 09:39:57 -0700	[diff] [blame]	2327	process_isolate_server_options(parser, options, True, True)
maruel@chromium.org	4f2ebe4	2013-09-19 13:09:08 +0000	[diff] [blame]	2328	if bool(options.isolated) == bool(options.file):
				2329	parser.error('Use one of --isolated or --file, and only one.')
maruel	4409e30	2016-07-19 14:25:51 -0700	[diff] [blame]	2330	if not options.cache and options.use_symlinks:
				2331	parser.error('--use-symlinks require the use of a cache with --cache')
maruel@chromium.org	b7e79a2	2013-09-13 01:24:56 +0000	[diff] [blame]	2332
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2333	cache = process_cache_options(options)
maruel	2e8d0f5	2016-07-16 07:51:29 -0700	[diff] [blame]	2334	cache.cleanup()
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2335	options.target = unicode(os.path.abspath(options.target))
Marc-Antoine Ruel	f90861c	2015-03-24 20:54:49 -0400	[diff] [blame]	2336	if options.isolated:
maruel	12e3001	2015-10-09 11:55:35 -0700	[diff] [blame]	2337	if (fs.isfile(options.target) or
				2338	(fs.isdir(options.target) and fs.listdir(options.target))):
Marc-Antoine Ruel	f90861c	2015-03-24 20:54:49 -0400	[diff] [blame]	2339	parser.error(
				2340	'--target \'%s\' exists, please use another target' % options.target)
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2341	with get_storage(options.isolate_server, options.namespace) as storage:
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2342	# Fetching individual files.
				2343	if options.file:
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2344	# TODO(maruel): Enable cache in this case too.
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2345	channel = threading_utils.TaskChannel()
				2346	pending = {}
				2347	for digest, dest in options.file:
				2348	pending[digest] = dest
				2349	storage.async_fetch(
				2350	channel,
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	2351	threading_utils.PRIORITY_MED,
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2352	digest,
Vadim Shtayura	3148e07	2014-09-02 18:51:52 -0700	[diff] [blame]	2353	UNKNOWN_FILE_SIZE,
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2354	functools.partial(file_write, os.path.join(options.target, dest)))
				2355	while pending:
				2356	fetched = channel.pull()
				2357	dest = pending.pop(fetched)
				2358	logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2359
Vadim Shtayura	3172be5	2013-12-03 12:49:05 -0800	[diff] [blame]	2360	# Fetching whole isolated tree.
				2361	if options.isolated:
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2362	with cache:
				2363	bundle = fetch_isolated(
				2364	isolated_hash=options.isolated,
				2365	storage=storage,
				2366	cache=cache,
maruel	4409e30	2016-07-19 14:25:51 -0700	[diff] [blame]	2367	outdir=options.target,
				2368	use_symlinks=options.use_symlinks)
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2369	if bundle.command:
				2370	rel = os.path.join(options.target, bundle.relative_cwd)
				2371	print('To run this test please run from the directory %s:' %
				2372	os.path.join(options.target, rel))
				2373	print(' ' + ' '.join(bundle.command))
vadimsh@chromium.org	7b5dae3	2013-10-03 16:59:59 +0000	[diff] [blame]	2374
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2375	return 0
				2376
				2377
Marc-Antoine Ruel	1f8ba35	2014-11-04 15:55:03 -0500	[diff] [blame]	2378	def add_archive_options(parser):
				2379	parser.add_option(
				2380	'--blacklist',
				2381	action='append', default=list(DEFAULT_BLACKLIST),
				2382	help='List of regexp to use as blacklist filter when uploading '
				2383	'directories')
				2384
				2385
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2386	def add_isolate_server_options(parser):
				2387	"""Adds --isolate-server and --namespace options to parser."""
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2388	parser.add_option(
				2389	'-I', '--isolate-server',
				2390	metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2391	help='URL of the Isolate Server to use. Defaults to the environment '
				2392	'variable ISOLATE_SERVER if set. No need to specify https://, this '
				2393	'is assumed.')
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2394	parser.add_option(
				2395	'--namespace', default='default-gzip',
				2396	help='The namespace to use on the Isolate Server, default: %default')
				2397
				2398
nodir	55be77b	2016-05-03 09:39:57 -0700	[diff] [blame]	2399	def process_isolate_server_options(
				2400	parser, options, set_exception_handler, required):
				2401	"""Processes the --isolate-server option.
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2402
				2403	Returns the identity as determined by the server.
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2404	"""
				2405	if not options.isolate_server:
nodir	55be77b	2016-05-03 09:39:57 -0700	[diff] [blame]	2406	if required:
				2407	parser.error('--isolate-server is required.')
				2408	return
				2409
Marc-Antoine Ruel	012067b	2014-12-10 15:45:42 -0500	[diff] [blame]	2410	try:
				2411	options.isolate_server = net.fix_url(options.isolate_server)
				2412	except ValueError as e:
				2413	parser.error('--isolate-server %s' % e)
Marc-Antoine Ruel	e290ada	2014-12-10 19:48:49 -0500	[diff] [blame]	2414	if set_exception_handler:
				2415	on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruel	f7d737d	2014-12-10 15:36:29 -0500	[diff] [blame]	2416	try:
				2417	return auth.ensure_logged_in(options.isolate_server)
				2418	except ValueError as e:
				2419	parser.error(str(e))
Marc-Antoine Ruel	8806e62	2014-02-12 14:15:53 -0500	[diff] [blame]	2420
Marc-Antoine Ruel	1687b5e	2014-02-06 17:47:53 -0500	[diff] [blame]	2421
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2422	def add_cache_options(parser):
				2423	cache_group = optparse.OptionGroup(parser, 'Cache management')
				2424	cache_group.add_option(
				2425	'--cache', metavar='DIR',
				2426	help='Directory to keep a local cache of the files. Accelerates download '
				2427	'by reusing already downloaded files. Default=%default')
				2428	cache_group.add_option(
				2429	'--max-cache-size',
				2430	type='int',
				2431	metavar='NNN',
maruel	7158610	2016-01-29 11:44:09 -0800	[diff] [blame]	2432	default=5010241024*1024,
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2433	help='Trim if the cache gets larger than this value, default=%default')
				2434	cache_group.add_option(
				2435	'--min-free-space',
				2436	type='int',
				2437	metavar='NNN',
				2438	default=210241024*1024,
				2439	help='Trim if disk free space becomes lower than this value, '
				2440	'default=%default')
				2441	cache_group.add_option(
				2442	'--max-items',
				2443	type='int',
				2444	metavar='NNN',
				2445	default=100000,
				2446	help='Trim if more than this number of items are in the cache '
				2447	'default=%default')
				2448	parser.add_option_group(cache_group)
				2449
				2450
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	2451	def process_cache_options(options, trim=True):
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2452	if options.cache:
				2453	policies = CachePolicies(
				2454	options.max_cache_size, options.min_free_space, options.max_items)
				2455
				2456	# \|options.cache\| path may not exist until DiskCache() instance is created.
				2457	return DiskCache(
Marc-Antoine Ruel	3c979cb	2015-03-11 13:43:28 -0400	[diff] [blame]	2458	unicode(os.path.abspath(options.cache)),
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2459	policies,
nodir	f33b8d6	2016-10-26 22:34:58 -0700	[diff] [blame]	2460	isolated_format.get_hash_algo(options.namespace),
				2461	trim=trim)
Marc-Antoine Ruel	a57d7db	2014-10-15 20:31:19 -0400	[diff] [blame]	2462	else:
				2463	return MemoryCache()
				2464
				2465
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame]	2466	class OptionParserIsolateServer(logging_utils.OptionParserWithLogging):
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2467	def __init__(self, **kwargs):
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame]	2468	logging_utils.OptionParserWithLogging.__init__(
Marc-Antoine Ruel	ac54cb4	2013-11-18 14:05:35 -0500	[diff] [blame]	2469	self,
				2470	version=__version__,
				2471	prog=os.path.basename(sys.modules[__name__].__file__),
				2472	**kwargs)
Vadim Shtayura	e34e13a	2014-02-02 11:23:26 -0800	[diff] [blame]	2473	auth.add_auth_options(self)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2474
				2475	def parse_args(self, args, *kwargs):
Marc-Antoine Ruel	f74cffe	2015-07-15 15:21:34 -0400	[diff] [blame]	2476	options, args = logging_utils.OptionParserWithLogging.parse_args(
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2477	self, args, *kwargs)
Vadim Shtayura	5d1efce	2014-02-04 10:55:43 -0800	[diff] [blame]	2478	auth.process_auth_options(self, options)
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2479	return options, args
				2480
				2481
				2482	def main(args):
				2483	dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruel	cfb6085	2014-07-02 15:22:00 -0400	[diff] [blame]	2484	return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.org	c6f9006	2012-11-07 18:32:22 +0000	[diff] [blame]	2485
				2486
				2487	if __name__ == '__main__':
maruel	8e4e40c	2016-05-30 06:21:07 -0700	[diff] [blame]	2488	subprocess42.inhibit_os_error_reporting()
maruel@chromium.org	fb78d43	2013-08-28 21:22:40 +0000	[diff] [blame]	2489	fix_encoding.fix_encoding()
				2490	tools.disable_buffering()
				2491	colorama.init()
maruel	4409e30	2016-07-19 14:25:51 -0700	[diff] [blame]	2492	file_path.enable_symlink()
maruel@chromium.org	cb3c3d5	2013-03-14 18:55:30 +0000	[diff] [blame]	2493	sys.exit(main(sys.argv[1:]))