blob: b9cf7a70ac34dec9390d561f07795665d678b6b9 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2013 The LUCI Authors. All rights reserved.
3# Use of this source code is governed by the Apache v2.0 license that can be
4# found in the LICENSE file.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00005
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04006"""Archives a set of files or directories to an Isolate Server."""
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00007
maruel12e30012015-10-09 11:55:35 -07008__version__ = '0.4.5'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
Cory Massarocc19c8c2015-03-10 13:35:11 -070010import base64
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +000011import functools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000012import logging
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -040013import optparse
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000014import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000015import re
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +040016import signal
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000017import sys
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050018import tempfile
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000019import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000020import time
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -050021import types
maruel@chromium.orge82112e2013-04-24 14:41:55 +000022import urllib
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050023import urlparse
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000024import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000025
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000026from third_party import colorama
27from third_party.depot_tools import fix_encoding
28from third_party.depot_tools import subcommand
29
Marc-Antoine Ruel37989932013-11-19 16:28:08 -050030from utils import file_path
maruel12e30012015-10-09 11:55:35 -070031from utils import fs
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040032from utils import logging_utils
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -040033from utils import lru
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000034from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040035from utils import on_error
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000036from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000037from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000038
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080039import auth
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040040import isolated_format
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080041
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000042
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000043# Version of isolate protocol passed to the server in /handshake request.
44ISOLATE_PROTOCOL_VERSION = '1.0'
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000045
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000046
Vadim Shtayura3148e072014-09-02 18:51:52 -070047# The file size to be used when we don't know the correct file size,
48# generally used for .isolated files.
49UNKNOWN_FILE_SIZE = None
50
51
52# Maximum expected delay (in seconds) between successive file fetches or uploads
53# in Storage. If it takes longer than that, a deadlock might be happening
54# and all stack frames for all threads are dumped to log.
55DEADLOCK_TIMEOUT = 5 * 60
56
57
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000058# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000059# All files are sorted by likelihood of a change in the file content
60# (currently file size is used to estimate this: larger the file -> larger the
61# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000062# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000063# and so on. Numbers here is a trade-off; the more per request, the lower the
64# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
65# larger values cause longer lookups, increasing the initial latency to start
66# uploading, which is especially an issue for large files. This value is
67# optimized for the "few thousands files to look up with minimal number of large
68# files missing" case.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -040069ITEMS_PER_CONTAINS_QUERIES = (20, 20, 50, 50, 50, 100)
csharp@chromium.org07fa7592013-01-11 18:19:30 +000070
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000071
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000072# A list of already compressed extension types that should not receive any
73# compression before being uploaded.
74ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel7f234c82014-08-06 21:55:18 -040075 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
76 'png', 'wav', 'zip',
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000077]
78
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000079
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000080# Chunk size to use when reading from network stream.
81NET_IO_FILE_CHUNK = 16 * 1024
82
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000083
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000084# Read timeout in seconds for downloads from isolate storage. If there's no
85# response from the server within this timeout whole download will be aborted.
86DOWNLOAD_READ_TIMEOUT = 60
87
88
maruel@chromium.org41601642013-09-18 19:40:46 +000089# The delay (in seconds) to wait between logging statements when retrieving
90# the required files. This is intended to let the user (or buildbot) know that
91# the program is still running.
92DELAY_BETWEEN_UPDATES_IN_SECS = 30
93
94
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -050095DEFAULT_BLACKLIST = (
96 # Temporary vim or python files.
97 r'^.+\.(?:pyc|swp)$',
98 # .git or .svn directory.
99 r'^(?:.+' + re.escape(os.path.sep) + r'|)\.(?:git|svn)$',
100)
101
102
Vadim Shtayura8623c272014-12-01 11:45:27 -0800103# A class to use to communicate with the server by default. Can be changed by
104# 'set_storage_api_class'. Default is IsolateServer.
105_storage_api_cls = None
106
107
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -0500108class Error(Exception):
109 """Generic runtime error."""
110 pass
111
112
maruela72f46e2016-02-24 11:05:45 -0800113class IsolatedErrorNoCommand(isolated_format.IsolatedError):
114 """Signals an early abort due to lack of command specified."""
115 pass
116
117
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400118class Aborted(Error):
119 """Operation aborted."""
120 pass
121
122
maruel12e30012015-10-09 11:55:35 -0700123def file_read(path, chunk_size=isolated_format.DISK_FILE_CHUNK, offset=0):
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800124 """Yields file content in chunks of |chunk_size| starting from |offset|."""
maruel12e30012015-10-09 11:55:35 -0700125 with fs.open(path, 'rb') as f:
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800126 if offset:
127 f.seek(offset)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000128 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000129 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000130 if not data:
131 break
132 yield data
133
134
maruel12e30012015-10-09 11:55:35 -0700135def file_write(path, content_generator):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000136 """Writes file content as generated by content_generator.
137
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000138 Creates the intermediary directory as needed.
139
140 Returns the number of bytes written.
141
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000142 Meant to be mocked out in unit tests.
143 """
maruel12e30012015-10-09 11:55:35 -0700144 filedir = os.path.dirname(path)
145 if not fs.isdir(filedir):
146 fs.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000147 total = 0
maruel12e30012015-10-09 11:55:35 -0700148 with fs.open(path, 'wb') as f:
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000149 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000150 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000151 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000152 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000153
154
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000155def zip_compress(content_generator, level=7):
156 """Reads chunks from |content_generator| and yields zip compressed chunks."""
157 compressor = zlib.compressobj(level)
158 for chunk in content_generator:
159 compressed = compressor.compress(chunk)
160 if compressed:
161 yield compressed
162 tail = compressor.flush(zlib.Z_FINISH)
163 if tail:
164 yield tail
165
166
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400167def zip_decompress(
168 content_generator, chunk_size=isolated_format.DISK_FILE_CHUNK):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000169 """Reads zipped data from |content_generator| and yields decompressed data.
170
171 Decompresses data in small chunks (no larger than |chunk_size|) so that
172 zip bomb file doesn't cause zlib to preallocate huge amount of memory.
173
174 Raises IOError if data is corrupted or incomplete.
175 """
176 decompressor = zlib.decompressobj()
177 compressed_size = 0
178 try:
179 for chunk in content_generator:
180 compressed_size += len(chunk)
181 data = decompressor.decompress(chunk, chunk_size)
182 if data:
183 yield data
184 while decompressor.unconsumed_tail:
185 data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
186 if data:
187 yield data
188 tail = decompressor.flush()
189 if tail:
190 yield tail
191 except zlib.error as e:
192 raise IOError(
193 'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
194 # Ensure all data was read and decompressed.
195 if decompressor.unused_data or decompressor.unconsumed_tail:
196 raise IOError('Not all data was decompressed')
197
198
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000199def get_zip_compression_level(filename):
200 """Given a filename calculates the ideal zip compression level to use."""
201 file_ext = os.path.splitext(filename)[1].lower()
202 # TODO(csharp): Profile to find what compression level works best.
203 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
204
205
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000206def create_directories(base_directory, files):
207 """Creates the directory structure needed by the given list of files."""
208 logging.debug('create_directories(%s, %d)', base_directory, len(files))
209 # Creates the tree of directories to create.
210 directories = set(os.path.dirname(f) for f in files)
211 for item in list(directories):
212 while item:
213 directories.add(item)
214 item = os.path.dirname(item)
215 for d in sorted(directories):
216 if d:
maruel12e30012015-10-09 11:55:35 -0700217 fs.mkdir(os.path.join(base_directory, d))
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000218
219
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500220def create_symlinks(base_directory, files):
221 """Creates any symlinks needed by the given set of files."""
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000222 for filepath, properties in files:
223 if 'l' not in properties:
224 continue
225 if sys.platform == 'win32':
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500226 # TODO(maruel): Create symlink via the win32 api.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000227 logging.warning('Ignoring symlink %s', filepath)
228 continue
229 outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500230 # os.symlink() doesn't exist on Windows.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000231 os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000232
233
maruel12e30012015-10-09 11:55:35 -0700234def is_valid_file(path, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000235 """Determines if the given files appears valid.
236
237 Currently it just checks the file's size.
238 """
Vadim Shtayura3148e072014-09-02 18:51:52 -0700239 if size == UNKNOWN_FILE_SIZE:
maruel12e30012015-10-09 11:55:35 -0700240 return fs.isfile(path)
241 actual_size = fs.stat(path).st_size
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000242 if size != actual_size:
243 logging.warning(
244 'Found invalid item %s; %d != %d',
maruel12e30012015-10-09 11:55:35 -0700245 os.path.basename(path), actual_size, size)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000246 return False
247 return True
248
249
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000250class Item(object):
251 """An item to push to Storage.
252
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800253 Its digest and size may be provided in advance, if known. Otherwise they will
254 be derived from content(). If digest is provided, it MUST correspond to
255 hash algorithm used by Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000256
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800257 When used with Storage, Item starts its life in a main thread, travels
258 to 'contains' thread, then to 'push' thread and then finally back to
259 the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000260 """
261
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800262 def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000263 self.digest = digest
264 self.size = size
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800265 self.high_priority = high_priority
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000266 self.compression_level = 6
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000267
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800268 def content(self):
269 """Iterable with content of this item as byte string (str) chunks."""
270 raise NotImplementedError()
271
272 def prepare(self, hash_algo):
273 """Ensures self.digest and self.size are set.
274
275 Uses content() as a source of data to calculate them. Does nothing if digest
276 and size is already known.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000277
278 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800279 hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000280 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800281 if self.digest is None or self.size is None:
282 digest = hash_algo()
283 total = 0
284 for chunk in self.content():
285 digest.update(chunk)
286 total += len(chunk)
287 self.digest = digest.hexdigest()
288 self.size = total
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000289
290
291class FileItem(Item):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800292 """A file to push to Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000293
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800294 Its digest and size may be provided in advance, if known. Otherwise they will
295 be derived from the file content.
296 """
297
298 def __init__(self, path, digest=None, size=None, high_priority=False):
299 super(FileItem, self).__init__(
300 digest,
maruel12e30012015-10-09 11:55:35 -0700301 size if size is not None else fs.stat(path).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800302 high_priority)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000303 self.path = path
304 self.compression_level = get_zip_compression_level(path)
305
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800306 def content(self):
307 return file_read(self.path)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000308
309
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000310class BufferItem(Item):
311 """A byte buffer to push to Storage."""
312
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800313 def __init__(self, buf, high_priority=False):
314 super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000315 self.buffer = buf
316
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800317 def content(self):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000318 return [self.buffer]
319
320
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000321class Storage(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800322 """Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000323
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800324 Implements compression support, parallel 'contains' checks, parallel uploads
325 and more.
326
327 Works only within single namespace (and thus hashing algorithm and compression
328 scheme are fixed).
329
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400330 Spawns multiple internal threads. Thread safe, but not fork safe. Modifies
331 signal handlers table to handle Ctrl+C.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800332 """
333
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700334 def __init__(self, storage_api):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000335 self._storage_api = storage_api
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400336 self._use_zip = isolated_format.is_namespace_with_compression(
337 storage_api.namespace)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400338 self._hash_algo = isolated_format.get_hash_algo(storage_api.namespace)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000339 self._cpu_thread_pool = None
340 self._net_thread_pool = None
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400341 self._aborted = False
342 self._prev_sig_handlers = {}
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000343
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000344 @property
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700345 def hash_algo(self):
346 """Hashing algorithm used to name files in storage based on their content.
347
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400348 Defined by |namespace|. See also isolated_format.get_hash_algo().
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700349 """
350 return self._hash_algo
351
352 @property
353 def location(self):
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -0500354 """URL of the backing store that this class is using."""
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700355 return self._storage_api.location
356
357 @property
358 def namespace(self):
359 """Isolate namespace used by this storage.
360
361 Indirectly defines hashing scheme and compression method used.
362 """
363 return self._storage_api.namespace
364
365 @property
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000366 def cpu_thread_pool(self):
367 """ThreadPool for CPU-bound tasks like zipping."""
368 if self._cpu_thread_pool is None:
Marc-Antoine Ruelbdad1182015-02-06 16:04:35 -0500369 threads = max(threading_utils.num_processors(), 2)
370 if sys.maxsize <= 2L**32:
371 # On 32 bits userland, do not try to use more than 16 threads.
372 threads = min(threads, 16)
373 self._cpu_thread_pool = threading_utils.ThreadPool(2, threads, 0, 'zip')
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000374 return self._cpu_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000375
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000376 @property
377 def net_thread_pool(self):
378 """AutoRetryThreadPool for IO-bound tasks, retries IOError."""
379 if self._net_thread_pool is None:
Vadim Shtayura3148e072014-09-02 18:51:52 -0700380 self._net_thread_pool = threading_utils.IOAutoRetryThreadPool()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000381 return self._net_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000382
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000383 def close(self):
384 """Waits for all pending tasks to finish."""
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400385 logging.info('Waiting for all threads to die...')
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000386 if self._cpu_thread_pool:
387 self._cpu_thread_pool.join()
388 self._cpu_thread_pool.close()
389 self._cpu_thread_pool = None
390 if self._net_thread_pool:
391 self._net_thread_pool.join()
392 self._net_thread_pool.close()
393 self._net_thread_pool = None
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400394 logging.info('Done.')
395
396 def abort(self):
397 """Cancels any pending or future operations."""
398 # This is not strictly theadsafe, but in the worst case the logging message
399 # will be printed twice. Not a big deal. In other places it is assumed that
400 # unprotected reads and writes to _aborted are serializable (it is true
401 # for python) and thus no locking is used.
402 if not self._aborted:
403 logging.warning('Aborting... It can take a while.')
404 self._aborted = True
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000405
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000406 def __enter__(self):
407 """Context manager interface."""
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400408 assert not self._prev_sig_handlers, self._prev_sig_handlers
409 for s in (signal.SIGINT, signal.SIGTERM):
410 self._prev_sig_handlers[s] = signal.signal(s, lambda *_args: self.abort())
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000411 return self
412
413 def __exit__(self, _exc_type, _exc_value, _traceback):
414 """Context manager interface."""
415 self.close()
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400416 while self._prev_sig_handlers:
417 s, h = self._prev_sig_handlers.popitem()
418 signal.signal(s, h)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000419 return False
420
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000421 def upload_items(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800422 """Uploads a bunch of items to the isolate server.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000423
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800424 It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000425
426 Arguments:
427 items: list of Item instances that represents data to upload.
428
429 Returns:
430 List of items that were uploaded. All other items are already there.
431 """
Vadim Shtayuraea38c572014-10-06 16:57:16 -0700432 logging.info('upload_items(items=%d)', len(items))
433
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800434 # Ensure all digests are calculated.
435 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700436 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800437
vadimsh@chromium.org672cd2b2013-10-08 17:49:33 +0000438 # For each digest keep only first Item that matches it. All other items
439 # are just indistinguishable copies from the point of view of isolate
440 # server (it doesn't care about paths at all, only content and digests).
441 seen = {}
442 duplicates = 0
443 for item in items:
444 if seen.setdefault(item.digest, item) is not item:
445 duplicates += 1
446 items = seen.values()
447 if duplicates:
Vadim Shtayuraea38c572014-10-06 16:57:16 -0700448 logging.info('Skipped %d files with duplicated content', duplicates)
vadimsh@chromium.org672cd2b2013-10-08 17:49:33 +0000449
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000450 # Enqueue all upload tasks.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000451 missing = set()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000452 uploaded = []
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800453 channel = threading_utils.TaskChannel()
454 for missing_item, push_state in self.get_missing_items(items):
455 missing.add(missing_item)
456 self.async_push(channel, missing_item, push_state)
457
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000458 # No need to spawn deadlock detector thread if there's nothing to upload.
459 if missing:
Vadim Shtayura3148e072014-09-02 18:51:52 -0700460 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000461 # Wait for all started uploads to finish.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000462 while len(uploaded) != len(missing):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000463 detector.ping()
464 item = channel.pull()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000465 uploaded.append(item)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000466 logging.debug(
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000467 'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000468 logging.info('All files are uploaded')
469
470 # Print stats.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000471 total = len(items)
472 total_size = sum(f.size for f in items)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000473 logging.info(
474 'Total: %6d, %9.1fkb',
475 total,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000476 total_size / 1024.)
477 cache_hit = set(items) - missing
478 cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000479 logging.info(
480 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
481 len(cache_hit),
482 cache_hit_size / 1024.,
483 len(cache_hit) * 100. / total,
484 cache_hit_size * 100. / total_size if total_size else 0)
485 cache_miss = missing
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000486 cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000487 logging.info(
488 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
489 len(cache_miss),
490 cache_miss_size / 1024.,
491 len(cache_miss) * 100. / total,
492 cache_miss_size * 100. / total_size if total_size else 0)
493
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000494 return uploaded
495
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800496 def async_push(self, channel, item, push_state):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000497 """Starts asynchronous push to the server in a parallel thread.
498
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800499 Can be used only after |item| was checked for presence on a server with
500 'get_missing_items' call. 'get_missing_items' returns |push_state| object
501 that contains storage specific information describing how to upload
502 the item (for example in case of cloud storage, it is signed upload URLs).
503
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000504 Arguments:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000505 channel: TaskChannel that receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000506 item: item to upload as instance of Item class.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800507 push_state: push state returned by 'get_missing_items' call for |item|.
508
509 Returns:
510 None, but |channel| later receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000511 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800512 # Thread pool task priority.
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400513 priority = (
Vadim Shtayura3148e072014-09-02 18:51:52 -0700514 threading_utils.PRIORITY_HIGH if item.high_priority
515 else threading_utils.PRIORITY_MED)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800516
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000517 def push(content):
Marc-Antoine Ruel095a8be2014-03-21 14:58:19 -0400518 """Pushes an Item and returns it to |channel|."""
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400519 if self._aborted:
520 raise Aborted()
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700521 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800522 self._storage_api.push(item, push_state, content)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000523 return item
524
525 # If zipping is not required, just start a push task.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700526 if not self._use_zip:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800527 self.net_thread_pool.add_task_with_channel(
528 channel, priority, push, item.content())
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000529 return
530
531 # If zipping is enabled, zip in a separate thread.
532 def zip_and_push():
533 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
534 # content right here. It will block until all file is zipped.
535 try:
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400536 if self._aborted:
537 raise Aborted()
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800538 stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000539 data = ''.join(stream)
540 except Exception as exc:
541 logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800542 channel.send_exception()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000543 return
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000544 self.net_thread_pool.add_task_with_channel(
545 channel, priority, push, [data])
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000546 self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000547
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800548 def push(self, item, push_state):
549 """Synchronously pushes a single item to the server.
550
551 If you need to push many items at once, consider using 'upload_items' or
552 'async_push' with instance of TaskChannel.
553
554 Arguments:
555 item: item to upload as instance of Item class.
556 push_state: push state returned by 'get_missing_items' call for |item|.
557
558 Returns:
559 Pushed item (same object as |item|).
560 """
561 channel = threading_utils.TaskChannel()
Vadim Shtayura3148e072014-09-02 18:51:52 -0700562 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800563 self.async_push(channel, item, push_state)
564 pushed = channel.pull()
565 assert pushed is item
566 return item
567
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000568 def async_fetch(self, channel, priority, digest, size, sink):
569 """Starts asynchronous fetch from the server in a parallel thread.
570
571 Arguments:
572 channel: TaskChannel that receives back |digest| when download ends.
573 priority: thread pool task priority for the fetch.
574 digest: hex digest of an item to download.
575 size: expected size of the item (after decompression).
576 sink: function that will be called as sink(generator).
577 """
578 def fetch():
579 try:
580 # Prepare reading pipeline.
581 stream = self._storage_api.fetch(digest)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700582 if self._use_zip:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400583 stream = zip_decompress(stream, isolated_format.DISK_FILE_CHUNK)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000584 # Run |stream| through verifier that will assert its size.
585 verifier = FetchStreamVerifier(stream, size)
586 # Verified stream goes to |sink|.
587 sink(verifier.run())
588 except Exception as err:
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800589 logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000590 raise
591 return digest
592
593 # Don't bother with zip_thread_pool for decompression. Decompression is
594 # really fast and most probably IO bound anyway.
595 self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
596
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000597 def get_missing_items(self, items):
598 """Yields items that are missing from the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000599
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000600 Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000601
602 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000603 items: a list of Item objects to check.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000604
605 Yields:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800606 For each missing item it yields a pair (item, push_state), where:
607 * item - Item object that is missing (one of |items|).
608 * push_state - opaque object that contains storage specific information
609 describing how to upload the item (for example in case of cloud
610 storage, it is signed upload URLs). It can later be passed to
611 'async_push'.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000612 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000613 channel = threading_utils.TaskChannel()
614 pending = 0
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800615
616 # Ensure all digests are calculated.
617 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700618 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800619
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400620 def contains(batch):
621 if self._aborted:
622 raise Aborted()
623 return self._storage_api.contains(batch)
624
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000625 # Enqueue all requests.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800626 for batch in batch_items_for_check(items):
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400627 self.net_thread_pool.add_task_with_channel(
Vadim Shtayuraf9e401b2014-10-15 18:19:37 +0400628 channel, threading_utils.PRIORITY_HIGH, contains, batch)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000629 pending += 1
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800630
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000631 # Yield results as they come in.
632 for _ in xrange(pending):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800633 for missing_item, push_state in channel.pull().iteritems():
634 yield missing_item, push_state
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000635
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000636
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800637def batch_items_for_check(items):
638 """Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000639
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800640 Each batch corresponds to a single 'exists?' query to the server via a call
641 to StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000642
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800643 Arguments:
644 items: a list of Item objects.
645
646 Yields:
647 Batches of items to query for existence in a single operation,
648 each batch is a list of Item objects.
649 """
650 batch_count = 0
651 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
652 next_queries = []
653 for item in sorted(items, key=lambda x: x.size, reverse=True):
654 next_queries.append(item)
655 if len(next_queries) == batch_size_limit:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000656 yield next_queries
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800657 next_queries = []
658 batch_count += 1
659 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
660 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
661 if next_queries:
662 yield next_queries
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000663
664
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000665class FetchQueue(object):
666 """Fetches items from Storage and places them into LocalCache.
667
668 It manages multiple concurrent fetch operations. Acts as a bridge between
669 Storage and LocalCache so that Storage and LocalCache don't depend on each
670 other at all.
671 """
672
673 def __init__(self, storage, cache):
674 self.storage = storage
675 self.cache = cache
676 self._channel = threading_utils.TaskChannel()
677 self._pending = set()
678 self._accessed = set()
679 self._fetched = cache.cached_set()
680
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400681 def add(
Vadim Shtayura3148e072014-09-02 18:51:52 -0700682 self,
683 digest,
684 size=UNKNOWN_FILE_SIZE,
685 priority=threading_utils.PRIORITY_MED):
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000686 """Starts asynchronous fetch of item |digest|."""
687 # Fetching it now?
688 if digest in self._pending:
689 return
690
691 # Mark this file as in use, verify_all_cached will later ensure it is still
692 # in cache.
693 self._accessed.add(digest)
694
695 # Already fetched? Notify cache to update item's LRU position.
696 if digest in self._fetched:
697 # 'touch' returns True if item is in cache and not corrupted.
698 if self.cache.touch(digest, size):
699 return
700 # Item is corrupted, remove it from cache and fetch it again.
701 self._fetched.remove(digest)
702 self.cache.evict(digest)
703
704 # TODO(maruel): It should look at the free disk space, the current cache
705 # size and the size of the new item on every new item:
706 # - Trim the cache as more entries are listed when free disk space is low,
707 # otherwise if the amount of data downloaded during the run > free disk
708 # space, it'll crash.
709 # - Make sure there's enough free disk space to fit all dependencies of
710 # this run! If not, abort early.
711
712 # Start fetching.
713 self._pending.add(digest)
714 self.storage.async_fetch(
715 self._channel, priority, digest, size,
716 functools.partial(self.cache.write, digest))
717
718 def wait(self, digests):
719 """Starts a loop that waits for at least one of |digests| to be retrieved.
720
721 Returns the first digest retrieved.
722 """
723 # Flush any already fetched items.
724 for digest in digests:
725 if digest in self._fetched:
726 return digest
727
728 # Ensure all requested items are being fetched now.
729 assert all(digest in self._pending for digest in digests), (
730 digests, self._pending)
731
732 # Wait for some requested item to finish fetching.
733 while self._pending:
734 digest = self._channel.pull()
735 self._pending.remove(digest)
736 self._fetched.add(digest)
737 if digest in digests:
738 return digest
739
740 # Should never reach this point due to assert above.
741 raise RuntimeError('Impossible state')
742
743 def inject_local_file(self, path, algo):
744 """Adds local file to the cache as if it was fetched from storage."""
maruel12e30012015-10-09 11:55:35 -0700745 with fs.open(path, 'rb') as f:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000746 data = f.read()
747 digest = algo(data).hexdigest()
748 self.cache.write(digest, [data])
749 self._fetched.add(digest)
750 return digest
751
752 @property
753 def pending_count(self):
754 """Returns number of items to be fetched."""
755 return len(self._pending)
756
757 def verify_all_cached(self):
758 """True if all accessed items are in cache."""
759 return self._accessed.issubset(self.cache.cached_set())
760
761
762class FetchStreamVerifier(object):
763 """Verifies that fetched file is valid before passing it to the LocalCache."""
764
765 def __init__(self, stream, expected_size):
Marc-Antoine Rueldf4976d2015-04-15 19:56:21 -0400766 assert stream is not None
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000767 self.stream = stream
768 self.expected_size = expected_size
769 self.current_size = 0
770
771 def run(self):
772 """Generator that yields same items as |stream|.
773
774 Verifies |stream| is complete before yielding a last chunk to consumer.
775
776 Also wraps IOError produced by consumer into MappingError exceptions since
777 otherwise Storage will retry fetch on unrelated local cache errors.
778 """
779 # Read one chunk ahead, keep it in |stored|.
780 # That way a complete stream can be verified before pushing last chunk
781 # to consumer.
782 stored = None
783 for chunk in self.stream:
784 assert chunk is not None
785 if stored is not None:
786 self._inspect_chunk(stored, is_last=False)
787 try:
788 yield stored
789 except IOError as exc:
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400790 raise isolated_format.MappingError(
791 'Failed to store an item in cache: %s' % exc)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000792 stored = chunk
793 if stored is not None:
794 self._inspect_chunk(stored, is_last=True)
795 try:
796 yield stored
797 except IOError as exc:
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -0400798 raise isolated_format.MappingError(
799 'Failed to store an item in cache: %s' % exc)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000800
801 def _inspect_chunk(self, chunk, is_last):
802 """Called for each fetched chunk before passing it to consumer."""
803 self.current_size += len(chunk)
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -0400804 if (is_last and
Vadim Shtayura3148e072014-09-02 18:51:52 -0700805 (self.expected_size != UNKNOWN_FILE_SIZE) and
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000806 (self.expected_size != self.current_size)):
807 raise IOError('Incorrect file size: expected %d, got %d' % (
808 self.expected_size, self.current_size))
809
810
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000811class StorageApi(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800812 """Interface for classes that implement low-level storage operations.
813
814 StorageApi is oblivious of compression and hashing scheme used. This details
815 are handled in higher level Storage class.
816
817 Clients should generally not use StorageApi directly. Storage class is
818 preferred since it implements compression and upload optimizations.
819 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000820
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700821 @property
822 def location(self):
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -0500823 """URL of the backing store that this class is using."""
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700824 raise NotImplementedError()
825
826 @property
827 def namespace(self):
828 """Isolate namespace used by this storage.
829
830 Indirectly defines hashing scheme and compression method used.
831 """
832 raise NotImplementedError()
833
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800834 def fetch(self, digest, offset=0):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000835 """Fetches an object and yields its content.
836
837 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000838 digest: hash digest of item to download.
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800839 offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000840
841 Yields:
842 Chunks of downloaded item (as str objects).
843 """
844 raise NotImplementedError()
845
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800846 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000847 """Uploads an |item| with content generated by |content| generator.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000848
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800849 |item| MUST go through 'contains' call to get |push_state| before it can
850 be pushed to the storage.
851
852 To be clear, here is one possible usage:
853 all_items = [... all items to push as Item subclasses ...]
854 for missing_item, push_state in storage_api.contains(all_items).items():
855 storage_api.push(missing_item, push_state)
856
857 When pushing to a namespace with compression, data that should be pushed
858 and data provided by the item is not the same. In that case |content| is
859 not None and it yields chunks of compressed data (using item.content() as
860 a source of original uncompressed data). This is implemented by Storage
861 class.
862
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000863 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000864 item: Item object that holds information about an item being pushed.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800865 push_state: push state object as returned by 'contains' call.
866 content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000867
868 Returns:
869 None.
870 """
871 raise NotImplementedError()
872
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000873 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800874 """Checks for |items| on the server, prepares missing ones for upload.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000875
876 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800877 items: list of Item objects to check for presence.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000878
879 Returns:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800880 A dict missing Item -> opaque push state object to be passed to 'push'.
881 See doc string for 'push'.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000882 """
883 raise NotImplementedError()
884
885
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800886class _IsolateServerPushState(object):
887 """Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger27f03da2014-02-12 16:47:01 -0500888
889 Note this needs to be a global class to support pickling.
890 """
891
Cory Massarocc19c8c2015-03-10 13:35:11 -0700892 def __init__(self, preupload_status, size):
893 self.preupload_status = preupload_status
894 gs_upload_url = preupload_status.get('gs_upload_url') or None
895 if gs_upload_url:
896 self.upload_url = gs_upload_url
897 self.finalize_url = '_ah/api/isolateservice/v1/finalize_gs_upload'
898 else:
899 self.upload_url = '_ah/api/isolateservice/v1/store_inline'
900 self.finalize_url = None
Mike Frysinger27f03da2014-02-12 16:47:01 -0500901 self.uploaded = False
902 self.finalized = False
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -0500903 self.size = size
Mike Frysinger27f03da2014-02-12 16:47:01 -0500904
905
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000906class IsolateServer(StorageApi):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000907 """StorageApi implementation that downloads and uploads to Isolate Server.
908
909 It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800910 Works only within single namespace.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000911 """
912
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000913 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000914 super(IsolateServer, self).__init__()
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -0500915 assert file_path.is_url(base_url), base_url
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700916 self._base_url = base_url.rstrip('/')
917 self._namespace = namespace
Cory Massarocc19c8c2015-03-10 13:35:11 -0700918 self._namespace_dict = {
919 'compression': 'flate' if namespace.endswith(
920 ('-gzip', '-flate')) else '',
921 'digest_hash': 'sha-1',
922 'namespace': namespace,
923 }
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000924 self._lock = threading.Lock()
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000925 self._server_caps = None
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -0500926 self._memory_use = 0
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000927
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000928 @property
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000929 def _server_capabilities(self):
Cory Massarocc19c8c2015-03-10 13:35:11 -0700930 """Gets server details.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000931
932 Returns:
Cory Massarocc19c8c2015-03-10 13:35:11 -0700933 Server capabilities dictionary as returned by /server_details endpoint.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000934 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000935 # TODO(maruel): Make this request much earlier asynchronously while the
936 # files are being enumerated.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800937
938 # TODO(vadimsh): Put |namespace| in the URL so that server can apply
939 # namespace-level ACLs to this call.
Cory Massarocc19c8c2015-03-10 13:35:11 -0700940
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000941 with self._lock:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000942 if self._server_caps is None:
Cory Massarocc19c8c2015-03-10 13:35:11 -0700943 self._server_caps = net.url_read_json(
944 url='%s/_ah/api/isolateservice/v1/server_details' % self._base_url,
945 data={})
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000946 return self._server_caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000947
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700948 @property
949 def location(self):
950 return self._base_url
951
952 @property
953 def namespace(self):
954 return self._namespace
955
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800956 def fetch(self, digest, offset=0):
Cory Massarocc19c8c2015-03-10 13:35:11 -0700957 assert offset >= 0
958 source_url = '%s/_ah/api/isolateservice/v1/retrieve' % (
959 self._base_url)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800960 logging.debug('download_file(%s, %d)', source_url, offset)
Cory Massarocc19c8c2015-03-10 13:35:11 -0700961 response = self.do_fetch(source_url, digest, offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000962
Cory Massarocc19c8c2015-03-10 13:35:11 -0700963 if not response:
maruele154f9c2015-09-14 11:03:15 -0700964 raise IOError(
965 'Attempted to fetch from %s; no data exist: %s / %s.' % (
966 source_url, self._namespace, digest))
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800967
Cory Massarocc19c8c2015-03-10 13:35:11 -0700968 # for DB uploads
969 content = response.get('content')
970 if content is not None:
maruel863ac262016-03-17 11:00:37 -0700971 yield base64.b64decode(content)
972 return
Cory Massarocc19c8c2015-03-10 13:35:11 -0700973
974 # for GS entities
975 connection = net.url_open(response['url'])
maruelf5574752015-09-17 13:40:27 -0700976 if not connection:
977 raise IOError('Failed to download %s / %s' % (self._namespace, digest))
Cory Massarocc19c8c2015-03-10 13:35:11 -0700978
979 # If |offset|, verify server respects it by checking Content-Range.
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800980 if offset:
981 content_range = connection.get_header('Content-Range')
982 if not content_range:
983 raise IOError('Missing Content-Range header')
984
985 # 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
986 # According to a spec, <size> can be '*' meaning "Total size of the file
987 # is not known in advance".
988 try:
989 match = re.match(r'bytes (\d+)-(\d+)/(\d+|\*)', content_range)
990 if not match:
991 raise ValueError()
992 content_offset = int(match.group(1))
993 last_byte_index = int(match.group(2))
994 size = None if match.group(3) == '*' else int(match.group(3))
995 except ValueError:
996 raise IOError('Invalid Content-Range header: %s' % content_range)
997
998 # Ensure returned offset equals requested one.
999 if offset != content_offset:
1000 raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
1001 offset, content_offset, content_range))
1002
1003 # Ensure entire tail of the file is returned.
1004 if size is not None and last_byte_index + 1 != size:
1005 raise IOError('Incomplete response. Content-Range: %s' % content_range)
1006
maruel863ac262016-03-17 11:00:37 -07001007 for data in connection.iter_content(NET_IO_FILE_CHUNK):
1008 yield data
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001009
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001010 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001011 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001012 assert item.digest is not None
1013 assert item.size is not None
1014 assert isinstance(push_state, _IsolateServerPushState)
1015 assert not push_state.finalized
1016
1017 # Default to item.content().
1018 content = item.content() if content is None else content
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -05001019 logging.info('Push state size: %d', push_state.size)
1020 if isinstance(content, (basestring, list)):
1021 # Memory is already used, too late.
1022 with self._lock:
1023 self._memory_use += push_state.size
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001024 else:
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -05001025 # TODO(vadimsh): Do not read from |content| generator when retrying push.
1026 # If |content| is indeed a generator, it can not be re-winded back to the
1027 # beginning of the stream. A retry will find it exhausted. A possible
1028 # solution is to wrap |content| generator with some sort of caching
1029 # restartable generator. It should be done alongside streaming support
1030 # implementation.
1031 #
1032 # In theory, we should keep the generator, so that it is not serialized in
1033 # memory. Sadly net.HttpService.request() requires the body to be
1034 # serialized.
1035 assert isinstance(content, types.GeneratorType), repr(content)
1036 slept = False
1037 # HACK HACK HACK. Please forgive me for my sins but OMG, it works!
Marc-Antoine Ruele6677c82015-02-05 14:54:22 -05001038 # One byte less than 512mb. This is to cope with incompressible content.
1039 max_size = int(sys.maxsize * 0.25)
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -05001040 while True:
1041 with self._lock:
1042 # This is due to 32 bits python when uploading very large files. The
1043 # problem is that it's comparing uncompressed sizes, while we care
1044 # about compressed sizes since it's what is serialized in memory.
1045 # The first check assumes large files are compressible and that by
1046 # throttling one upload at once, we can survive. Otherwise, kaboom.
1047 memory_use = self._memory_use
1048 if ((push_state.size >= max_size and not memory_use) or
1049 (memory_use + push_state.size <= max_size)):
1050 self._memory_use += push_state.size
1051 memory_use = self._memory_use
1052 break
1053 time.sleep(0.1)
1054 slept = True
1055 if slept:
1056 logging.info('Unblocked: %d %d', memory_use, push_state.size)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001057
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -05001058 try:
1059 # This push operation may be a retry after failed finalization call below,
1060 # no need to reupload contents in that case.
1061 if not push_state.uploaded:
1062 # PUT file to |upload_url|.
Cory Massarocc19c8c2015-03-10 13:35:11 -07001063 success = self.do_push(push_state, content)
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -05001064 if not success:
Cory Massarocc19c8c2015-03-10 13:35:11 -07001065 raise IOError('Failed to upload file with hash %s to URL %s' % (
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -05001066 item.digest, push_state.upload_url))
1067 push_state.uploaded = True
1068 else:
1069 logging.info(
1070 'A file %s already uploaded, retrying finalization only',
1071 item.digest)
1072
1073 # Optionally notify the server that it's done.
1074 if push_state.finalize_url:
1075 # TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
1076 # send it to isolated server. That way isolate server can verify that
1077 # the data safely reached Google Storage (GS provides MD5 and CRC32C of
1078 # stored files).
1079 # TODO(maruel): Fix the server to accept properly data={} so
1080 # url_read_json() can be used.
Cory Massarocc19c8c2015-03-10 13:35:11 -07001081 response = net.url_read_json(
1082 url='%s/%s' % (self._base_url, push_state.finalize_url),
1083 data={
1084 'upload_ticket': push_state.preupload_status['upload_ticket'],
1085 })
1086 if not response or not response['ok']:
1087 raise IOError('Failed to finalize file with hash %s.' % item.digest)
Marc-Antoine Ruele98dde92015-01-22 14:53:05 -05001088 push_state.finalized = True
1089 finally:
1090 with self._lock:
1091 self._memory_use -= push_state.size
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001092
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001093 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001094 # Ensure all items were initialized with 'prepare' call. Storage does that.
1095 assert all(i.digest is not None and i.size is not None for i in items)
1096
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001097 # Request body is a json encoded list of dicts.
Cory Massarocc19c8c2015-03-10 13:35:11 -07001098 body = {
1099 'items': [
1100 {
1101 'digest': item.digest,
1102 'is_isolated': bool(item.high_priority),
1103 'size': item.size,
1104 } for item in items
1105 ],
1106 'namespace': self._namespace_dict,
1107 }
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001108
Cory Massarocc19c8c2015-03-10 13:35:11 -07001109 query_url = '%s/_ah/api/isolateservice/v1/preupload' % self._base_url
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001110
1111 # Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001112 response = None
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001113 try:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001114 response = net.url_read_json(url=query_url, data=body)
1115 if response is None:
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04001116 raise isolated_format.MappingError(
Cory Massarocc19c8c2015-03-10 13:35:11 -07001117 'Failed to execute preupload query')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001118 except ValueError as err:
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04001119 raise isolated_format.MappingError(
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001120 'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001121
1122 # Pick Items that are missing, attach _PushState to them.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001123 missing_items = {}
Cory Massarocc19c8c2015-03-10 13:35:11 -07001124 for preupload_status in response.get('items', []):
1125 assert 'upload_ticket' in preupload_status, (
1126 preupload_status, '/preupload did not generate an upload ticket')
1127 index = int(preupload_status['index'])
1128 missing_items[items[index]] = _IsolateServerPushState(
1129 preupload_status, items[index].size)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001130 logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001131 len(items), len(items) - len(missing_items))
1132 return missing_items
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001133
Cory Massarocc19c8c2015-03-10 13:35:11 -07001134 def do_fetch(self, url, digest, offset):
Vadim Shtayura8623c272014-12-01 11:45:27 -08001135 """Fetches isolated data from the URL.
1136
1137 Used only for fetching files, not for API calls. Can be overridden in
1138 subclasses.
1139
1140 Args:
1141 url: URL to fetch the data from, can possibly return http redirect.
1142 offset: byte offset inside the file to start fetching from.
1143
1144 Returns:
1145 net.HttpResponse compatible object, with 'read' and 'get_header' calls.
1146 """
Cory Massarocc19c8c2015-03-10 13:35:11 -07001147 assert isinstance(offset, int)
1148 data = {
1149 'digest': digest.encode('utf-8'),
1150 'namespace': self._namespace_dict,
1151 'offset': offset,
1152 }
maruel0c25f4f2015-12-15 05:41:17 -08001153 # TODO(maruel): url + '?' + urllib.urlencode(data) once a HTTP GET endpoint
1154 # is added.
Cory Massarocc19c8c2015-03-10 13:35:11 -07001155 return net.url_read_json(
1156 url=url,
1157 data=data,
1158 read_timeout=DOWNLOAD_READ_TIMEOUT)
Vadim Shtayura8623c272014-12-01 11:45:27 -08001159
Cory Massarocc19c8c2015-03-10 13:35:11 -07001160 def do_push(self, push_state, content):
Vadim Shtayura8623c272014-12-01 11:45:27 -08001161 """Uploads isolated file to the URL.
1162
1163 Used only for storing files, not for API calls. Can be overridden in
1164 subclasses.
1165
1166 Args:
1167 url: URL to upload the data to.
Cory Massarocc19c8c2015-03-10 13:35:11 -07001168 push_state: an _IsolateServicePushState instance
1169 item: the original Item to be uploaded
Vadim Shtayura8623c272014-12-01 11:45:27 -08001170 content: an iterable that yields 'str' chunks.
Vadim Shtayura8623c272014-12-01 11:45:27 -08001171 """
1172 # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
1173 # upload support is implemented.
1174 if isinstance(content, list) and len(content) == 1:
1175 content = content[0]
1176 else:
1177 content = ''.join(content)
Cory Massarocc19c8c2015-03-10 13:35:11 -07001178
1179 # DB upload
1180 if not push_state.finalize_url:
1181 url = '%s/%s' % (self._base_url, push_state.upload_url)
1182 content = base64.b64encode(content)
1183 data = {
1184 'upload_ticket': push_state.preupload_status['upload_ticket'],
1185 'content': content,
1186 }
1187 response = net.url_read_json(url=url, data=data)
1188 return response is not None and response['ok']
1189
1190 # upload to GS
1191 url = push_state.upload_url
Vadim Shtayura8623c272014-12-01 11:45:27 -08001192 response = net.url_read(
Cory Massarocc19c8c2015-03-10 13:35:11 -07001193 content_type='application/octet-stream',
1194 data=content,
1195 method='PUT',
tandriib44d54d2016-02-10 11:31:41 -08001196 headers={'Cache-Control': 'public, max-age=31536000'},
Cory Massarocc19c8c2015-03-10 13:35:11 -07001197 url=url)
Vadim Shtayura8623c272014-12-01 11:45:27 -08001198 return response is not None
1199
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001200
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001201class LocalCache(object):
1202 """Local cache that stores objects fetched via Storage.
1203
1204 It can be accessed concurrently from multiple threads, so it should protect
1205 its internal state with some lock.
1206 """
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001207 cache_dir = None
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001208
1209 def __enter__(self):
1210 """Context manager interface."""
1211 return self
1212
1213 def __exit__(self, _exc_type, _exec_value, _traceback):
1214 """Context manager interface."""
1215 return False
1216
1217 def cached_set(self):
1218 """Returns a set of all cached digests (always a new object)."""
1219 raise NotImplementedError()
1220
1221 def touch(self, digest, size):
1222 """Ensures item is not corrupted and updates its LRU position.
1223
1224 Arguments:
1225 digest: hash digest of item to check.
1226 size: expected size of this item.
1227
1228 Returns:
1229 True if item is in cache and not corrupted.
1230 """
1231 raise NotImplementedError()
1232
1233 def evict(self, digest):
1234 """Removes item from cache if it's there."""
1235 raise NotImplementedError()
1236
1237 def read(self, digest):
1238 """Returns contents of the cached item as a single str."""
1239 raise NotImplementedError()
1240
1241 def write(self, digest, content):
1242 """Reads data from |content| generator and stores it in cache."""
1243 raise NotImplementedError()
1244
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001245 def hardlink(self, digest, dest, file_mode):
1246 """Ensures file at |dest| has same content as cached |digest|.
1247
1248 If file_mode is provided, it is used to set the executable bit if
1249 applicable.
1250 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001251 raise NotImplementedError()
1252
1253
1254class MemoryCache(LocalCache):
1255 """LocalCache implementation that stores everything in memory."""
1256
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001257 def __init__(self, file_mode_mask=0500):
1258 """Args:
1259 file_mode_mask: bit mask to AND file mode with. Default value will make
1260 all mapped files to be read only.
1261 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001262 super(MemoryCache, self).__init__()
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001263 self._file_mode_mask = file_mode_mask
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001264 # Let's not assume dict is thread safe.
1265 self._lock = threading.Lock()
1266 self._contents = {}
1267
1268 def cached_set(self):
1269 with self._lock:
1270 return set(self._contents)
1271
1272 def touch(self, digest, size):
1273 with self._lock:
1274 return digest in self._contents
1275
1276 def evict(self, digest):
1277 with self._lock:
1278 self._contents.pop(digest, None)
1279
1280 def read(self, digest):
1281 with self._lock:
1282 return self._contents[digest]
1283
1284 def write(self, digest, content):
1285 # Assemble whole stream before taking the lock.
1286 data = ''.join(content)
1287 with self._lock:
1288 self._contents[digest] = data
1289
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001290 def hardlink(self, digest, dest, file_mode):
1291 """Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001292 file_write(dest, [self.read(digest)])
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001293 if file_mode is not None:
maruel12e30012015-10-09 11:55:35 -07001294 fs.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001295
1296
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001297class CachePolicies(object):
1298 def __init__(self, max_cache_size, min_free_space, max_items):
1299 """
1300 Arguments:
1301 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1302 cache is effectively a leak.
1303 - min_free_space: Trim if disk free space becomes lower than this value. If
1304 0, it unconditionally fill the disk.
1305 - max_items: Maximum number of items to keep in the cache. If 0, do not
1306 enforce a limit.
1307 """
1308 self.max_cache_size = max_cache_size
1309 self.min_free_space = min_free_space
1310 self.max_items = max_items
1311
1312
1313class DiskCache(LocalCache):
1314 """Stateful LRU cache in a flat hash table in a directory.
1315
1316 Saves its state as json file.
1317 """
maruel12e30012015-10-09 11:55:35 -07001318 STATE_FILE = u'state.json'
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001319
1320 def __init__(self, cache_dir, policies, hash_algo):
1321 """
1322 Arguments:
1323 cache_dir: directory where to place the cache.
1324 policies: cache retention policies.
1325 algo: hashing algorithm used.
1326 """
1327 super(DiskCache, self).__init__()
1328 self.cache_dir = cache_dir
1329 self.policies = policies
1330 self.hash_algo = hash_algo
1331 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1332
1333 # All protected methods (starting with '_') except _path should be called
1334 # with this lock locked.
1335 self._lock = threading_utils.LockWithAssert()
1336 self._lru = lru.LRUDict()
1337
1338 # Profiling values.
1339 self._added = []
1340 self._removed = []
1341 self._free_disk = 0
1342
1343 with tools.Profiler('Setup'):
1344 with self._lock:
1345 self._load()
1346
1347 def __enter__(self):
1348 return self
1349
1350 def __exit__(self, _exc_type, _exec_value, _traceback):
1351 with tools.Profiler('CleanupTrimming'):
1352 with self._lock:
1353 self._trim()
1354
1355 logging.info(
1356 '%5d (%8dkb) added',
1357 len(self._added), sum(self._added) / 1024)
1358 logging.info(
1359 '%5d (%8dkb) current',
1360 len(self._lru),
1361 sum(self._lru.itervalues()) / 1024)
1362 logging.info(
1363 '%5d (%8dkb) removed',
1364 len(self._removed), sum(self._removed) / 1024)
1365 logging.info(
1366 ' %8dkb free',
1367 self._free_disk / 1024)
1368 return False
1369
1370 def cached_set(self):
1371 with self._lock:
1372 return self._lru.keys_set()
1373
1374 def touch(self, digest, size):
1375 """Verifies an actual file is valid.
1376
1377 Note that is doesn't compute the hash so it could still be corrupted if the
1378 file size didn't change.
1379
1380 TODO(maruel): More stringent verification while keeping the check fast.
1381 """
1382 # Do the check outside the lock.
1383 if not is_valid_file(self._path(digest), size):
1384 return False
1385
1386 # Update it's LRU position.
1387 with self._lock:
1388 if digest not in self._lru:
1389 return False
1390 self._lru.touch(digest)
1391 return True
1392
1393 def evict(self, digest):
1394 with self._lock:
1395 self._lru.pop(digest)
1396 self._delete_file(digest, UNKNOWN_FILE_SIZE)
1397
1398 def read(self, digest):
maruel12e30012015-10-09 11:55:35 -07001399 with fs.open(self._path(digest), 'rb') as f:
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001400 return f.read()
1401
1402 def write(self, digest, content):
Marc-Antoine Rueldf4976d2015-04-15 19:56:21 -04001403 assert content is not None
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001404 path = self._path(digest)
1405 # A stale broken file may remain. It is possible for the file to have write
1406 # access bit removed which would cause the file_write() call to fail to open
1407 # in write mode. Take no chance here.
1408 file_path.try_remove(path)
1409 try:
1410 size = file_write(path, content)
1411 except:
1412 # There are two possible places were an exception can occur:
1413 # 1) Inside |content| generator in case of network or unzipping errors.
1414 # 2) Inside file_write itself in case of disk IO errors.
1415 # In any case delete an incomplete file and propagate the exception to
1416 # caller, it will be logged there.
1417 file_path.try_remove(path)
1418 raise
1419 # Make the file read-only in the cache. This has a few side-effects since
1420 # the file node is modified, so every directory entries to this file becomes
1421 # read-only. It's fine here because it is a new file.
1422 file_path.set_read_only(path, True)
1423 with self._lock:
1424 self._add(digest, size)
1425
1426 def hardlink(self, digest, dest, file_mode):
1427 """Hardlinks the file to |dest|.
1428
1429 Note that the file permission bits are on the file node, not the directory
1430 entry, so changing the access bit on any of the directory entries for the
1431 file node will affect them all.
1432 """
1433 path = self._path(digest)
maruel1f7e8162015-09-16 10:35:43 -07001434 if not file_path.link_file(dest, path, file_path.HARDLINK_WITH_FALLBACK):
1435 # Report to the server that it failed with more details. We'll want to
1436 # squash them all.
1437 on_error.report('Failed to hardlink\n%s -> %s' % (path, dest))
1438
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001439 if file_mode is not None:
1440 # Ignores all other bits.
maruel12e30012015-10-09 11:55:35 -07001441 fs.chmod(dest, file_mode & 0500)
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001442
1443 def _load(self):
1444 """Loads state of the cache from json file."""
1445 self._lock.assert_locked()
1446
1447 if not os.path.isdir(self.cache_dir):
maruel12e30012015-10-09 11:55:35 -07001448 fs.makedirs(self.cache_dir)
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001449 else:
1450 # Make sure the cache is read-only.
1451 # TODO(maruel): Calculate the cost and optimize the performance
1452 # accordingly.
1453 file_path.make_tree_read_only(self.cache_dir)
1454
1455 # Load state of the cache.
maruel12e30012015-10-09 11:55:35 -07001456 if fs.isfile(self.state_file):
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001457 try:
1458 self._lru = lru.LRUDict.load(self.state_file)
1459 except ValueError as err:
1460 logging.error('Failed to load cache state: %s' % (err,))
1461 # Don't want to keep broken state file.
1462 file_path.try_remove(self.state_file)
1463
1464 # Ensure that all files listed in the state still exist and add new ones.
1465 previous = self._lru.keys_set()
1466 unknown = []
maruel12e30012015-10-09 11:55:35 -07001467 for filename in fs.listdir(self.cache_dir):
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001468 if filename == self.STATE_FILE:
1469 continue
1470 if filename in previous:
1471 previous.remove(filename)
1472 continue
1473 # An untracked file.
1474 if not isolated_format.is_valid_hash(filename, self.hash_algo):
1475 logging.warning('Removing unknown file %s from cache', filename)
Marc-Antoine Ruel8cd33372015-02-09 12:54:43 -05001476 p = self._path(filename)
maruel12e30012015-10-09 11:55:35 -07001477 if fs.isdir(p):
Marc-Antoine Ruel8cd33372015-02-09 12:54:43 -05001478 try:
1479 file_path.rmtree(p)
1480 except OSError:
1481 pass
1482 else:
1483 file_path.try_remove(p)
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001484 continue
1485 # File that's not referenced in 'state.json'.
1486 # TODO(vadimsh): Verify its SHA1 matches file name.
1487 logging.warning('Adding unknown file %s to cache', filename)
1488 unknown.append(filename)
1489
1490 if unknown:
1491 # Add as oldest files. They will be deleted eventually if not accessed.
1492 self._add_oldest_list(unknown)
1493 logging.warning('Added back %d unknown files', len(unknown))
1494
1495 if previous:
1496 # Filter out entries that were not found.
1497 logging.warning('Removed %d lost files', len(previous))
1498 for filename in previous:
1499 self._lru.pop(filename)
1500 self._trim()
1501
1502 def _save(self):
1503 """Saves the LRU ordering."""
1504 self._lock.assert_locked()
1505 if sys.platform != 'win32':
1506 d = os.path.dirname(self.state_file)
maruel12e30012015-10-09 11:55:35 -07001507 if fs.isdir(d):
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001508 # Necessary otherwise the file can't be created.
1509 file_path.set_read_only(d, False)
maruel12e30012015-10-09 11:55:35 -07001510 if fs.isfile(self.state_file):
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001511 file_path.set_read_only(self.state_file, False)
1512 self._lru.save(self.state_file)
1513
1514 def _trim(self):
1515 """Trims anything we don't know, make sure enough free space exists."""
1516 self._lock.assert_locked()
1517
1518 # Ensure maximum cache size.
1519 if self.policies.max_cache_size:
1520 total_size = sum(self._lru.itervalues())
1521 while total_size > self.policies.max_cache_size:
1522 total_size -= self._remove_lru_file()
1523
1524 # Ensure maximum number of items in the cache.
1525 if self.policies.max_items and len(self._lru) > self.policies.max_items:
1526 for _ in xrange(len(self._lru) - self.policies.max_items):
1527 self._remove_lru_file()
1528
1529 # Ensure enough free space.
1530 self._free_disk = file_path.get_free_space(self.cache_dir)
1531 trimmed_due_to_space = False
1532 while (
1533 self.policies.min_free_space and
1534 self._lru and
1535 self._free_disk < self.policies.min_free_space):
1536 trimmed_due_to_space = True
1537 self._remove_lru_file()
1538 self._free_disk = file_path.get_free_space(self.cache_dir)
1539 if trimmed_due_to_space:
1540 total_usage = sum(self._lru.itervalues())
1541 usage_percent = 0.
1542 if total_usage:
1543 usage_percent = 100. * self.policies.max_cache_size / float(total_usage)
1544 logging.warning(
1545 'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
1546 'cache (%.1f%% of its maximum capacity)',
1547 self._free_disk / 1024.,
1548 total_usage / 1024.,
1549 usage_percent)
1550 self._save()
1551
1552 def _path(self, digest):
1553 """Returns the path to one item."""
1554 return os.path.join(self.cache_dir, digest)
1555
1556 def _remove_lru_file(self):
1557 """Removes the last recently used file and returns its size."""
1558 self._lock.assert_locked()
1559 digest, size = self._lru.pop_oldest()
1560 self._delete_file(digest, size)
1561 return size
1562
1563 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
1564 """Adds an item into LRU cache marking it as a newest one."""
1565 self._lock.assert_locked()
1566 if size == UNKNOWN_FILE_SIZE:
maruel12e30012015-10-09 11:55:35 -07001567 size = fs.stat(self._path(digest)).st_size
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001568 self._added.append(size)
1569 self._lru.add(digest, size)
1570
1571 def _add_oldest_list(self, digests):
1572 """Adds a bunch of items into LRU cache marking them as oldest ones."""
1573 self._lock.assert_locked()
1574 pairs = []
1575 for digest in digests:
maruel12e30012015-10-09 11:55:35 -07001576 size = fs.stat(self._path(digest)).st_size
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001577 self._added.append(size)
1578 pairs.append((digest, size))
1579 self._lru.batch_insert_oldest(pairs)
1580
1581 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
1582 """Deletes cache file from the file system."""
1583 self._lock.assert_locked()
1584 try:
1585 if size == UNKNOWN_FILE_SIZE:
maruel12e30012015-10-09 11:55:35 -07001586 size = fs.stat(self._path(digest)).st_size
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001587 file_path.try_remove(self._path(digest))
1588 self._removed.append(size)
1589 except OSError as e:
1590 logging.error('Error attempting to delete a file %s:\n%s' % (digest, e))
1591
1592
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001593class IsolatedBundle(object):
1594 """Fetched and parsed .isolated file with all dependencies."""
1595
Vadim Shtayura3148e072014-09-02 18:51:52 -07001596 def __init__(self):
1597 self.command = []
1598 self.files = {}
1599 self.read_only = None
1600 self.relative_cwd = None
1601 # The main .isolated file, a IsolatedFile instance.
1602 self.root = None
1603
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001604 def fetch(self, fetch_queue, root_isolated_hash, algo):
1605 """Fetches the .isolated and all the included .isolated.
Vadim Shtayura3148e072014-09-02 18:51:52 -07001606
1607 It enables support for "included" .isolated files. They are processed in
1608 strict order but fetched asynchronously from the cache. This is important so
1609 that a file in an included .isolated file that is overridden by an embedding
1610 .isolated file is not fetched needlessly. The includes are fetched in one
1611 pass and the files are fetched as soon as all the ones on the left-side
1612 of the tree were fetched.
1613
1614 The prioritization is very important here for nested .isolated files.
1615 'includes' have the highest priority and the algorithm is optimized for both
1616 deep and wide trees. A deep one is a long link of .isolated files referenced
1617 one at a time by one item in 'includes'. A wide one has a large number of
1618 'includes' in a single .isolated file. 'left' is defined as an included
1619 .isolated file earlier in the 'includes' list. So the order of the elements
1620 in 'includes' is important.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001621
1622 As a side effect this method starts asynchronous fetch of all data files
1623 by adding them to |fetch_queue|. It doesn't wait for data files to finish
1624 fetching though.
Vadim Shtayura3148e072014-09-02 18:51:52 -07001625 """
1626 self.root = isolated_format.IsolatedFile(root_isolated_hash, algo)
1627
1628 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1629 pending = {}
1630 # Set of hashes of already retrieved items to refuse recursive includes.
1631 seen = set()
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001632 # Set of IsolatedFile's whose data files have already being fetched.
1633 processed = set()
Vadim Shtayura3148e072014-09-02 18:51:52 -07001634
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001635 def retrieve_async(isolated_file):
Vadim Shtayura3148e072014-09-02 18:51:52 -07001636 h = isolated_file.obj_hash
1637 if h in seen:
1638 raise isolated_format.IsolatedError(
1639 'IsolatedFile %s is retrieved recursively' % h)
1640 assert h not in pending
1641 seen.add(h)
1642 pending[h] = isolated_file
1643 fetch_queue.add(h, priority=threading_utils.PRIORITY_HIGH)
1644
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001645 # Start fetching root *.isolated file (single file, not the whole bundle).
1646 retrieve_async(self.root)
Vadim Shtayura3148e072014-09-02 18:51:52 -07001647
1648 while pending:
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001649 # Wait until some *.isolated file is fetched, parse it.
Vadim Shtayura3148e072014-09-02 18:51:52 -07001650 item_hash = fetch_queue.wait(pending)
1651 item = pending.pop(item_hash)
1652 item.load(fetch_queue.cache.read(item_hash))
Vadim Shtayura3148e072014-09-02 18:51:52 -07001653
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001654 # Start fetching included *.isolated files.
Vadim Shtayura3148e072014-09-02 18:51:52 -07001655 for new_child in item.children:
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001656 retrieve_async(new_child)
Vadim Shtayura3148e072014-09-02 18:51:52 -07001657
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001658 # Always fetch *.isolated files in traversal order, waiting if necessary
1659 # until next to-be-processed node loads. "Waiting" is done by yielding
1660 # back to the outer loop, that waits until some *.isolated is loaded.
1661 for node in isolated_format.walk_includes(self.root):
1662 if node not in processed:
1663 # Not visited, and not yet loaded -> wait for it to load.
1664 if not node.is_loaded:
1665 break
1666 # Not visited and loaded -> process it and continue the traversal.
1667 self._start_fetching_files(node, fetch_queue)
1668 processed.add(node)
Vadim Shtayura3148e072014-09-02 18:51:52 -07001669
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001670 # All *.isolated files should be processed by now and only them.
1671 all_isolateds = set(isolated_format.walk_includes(self.root))
1672 assert all_isolateds == processed, (all_isolateds, processed)
Vadim Shtayura3148e072014-09-02 18:51:52 -07001673
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001674 # Extract 'command' and other bundle properties.
1675 for node in isolated_format.walk_includes(self.root):
1676 self._update_self(node)
Vadim Shtayura3148e072014-09-02 18:51:52 -07001677 self.relative_cwd = self.relative_cwd or ''
1678
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001679 def _start_fetching_files(self, isolated, fetch_queue):
1680 """Starts fetching files from |isolated| that are not yet being fetched.
Vadim Shtayura3148e072014-09-02 18:51:52 -07001681
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001682 Modifies self.files.
1683 """
1684 logging.debug('fetch_files(%s)', isolated.obj_hash)
1685 for filepath, properties in isolated.data.get('files', {}).iteritems():
1686 # Root isolated has priority on the files being mapped. In particular,
1687 # overridden files must not be fetched.
1688 if filepath not in self.files:
1689 self.files[filepath] = properties
1690 if 'h' in properties:
1691 # Preemptively request files.
1692 logging.debug('fetching %s', filepath)
1693 fetch_queue.add(
1694 properties['h'], properties['s'], threading_utils.PRIORITY_MED)
1695
1696 def _update_self(self, node):
1697 """Extracts bundle global parameters from loaded *.isolated file.
1698
1699 Will be called with each loaded *.isolated file in order of traversal of
1700 isolated include graph (see isolated_format.walk_includes).
1701 """
Vadim Shtayura3148e072014-09-02 18:51:52 -07001702 # Grabs properties.
1703 if not self.command and node.data.get('command'):
1704 # Ensure paths are correctly separated on windows.
1705 self.command = node.data['command']
1706 if self.command:
1707 self.command[0] = self.command[0].replace('/', os.path.sep)
1708 self.command = tools.fix_python_path(self.command)
1709 if self.read_only is None and node.data.get('read_only') is not None:
1710 self.read_only = node.data['read_only']
1711 if (self.relative_cwd is None and
1712 node.data.get('relative_cwd') is not None):
1713 self.relative_cwd = node.data['relative_cwd']
1714
1715
Vadim Shtayura8623c272014-12-01 11:45:27 -08001716def set_storage_api_class(cls):
1717 """Replaces StorageApi implementation used by default."""
1718 global _storage_api_cls
1719 assert _storage_api_cls is None
1720 assert issubclass(cls, StorageApi)
1721 _storage_api_cls = cls
1722
1723
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -05001724def get_storage_api(url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001725 """Returns an object that implements low-level StorageApi interface.
1726
1727 It is used by Storage to work with single isolate |namespace|. It should
1728 rarely be used directly by clients, see 'get_storage' for
1729 a better alternative.
1730
1731 Arguments:
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -05001732 url: URL of isolate service to use shared cloud based storage.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001733 namespace: isolate namespace to operate in, also defines hashing and
1734 compression scheme used, i.e. namespace names that end with '-gzip'
1735 store compressed data.
1736
1737 Returns:
1738 Instance of StorageApi subclass.
1739 """
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -05001740 cls = _storage_api_cls or IsolateServer
1741 return cls(url, namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001742
1743
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -05001744def get_storage(url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001745 """Returns Storage class that can upload and download from |namespace|.
1746
1747 Arguments:
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -05001748 url: URL of isolate service to use shared cloud based storage.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001749 namespace: isolate namespace to operate in, also defines hashing and
1750 compression scheme used, i.e. namespace names that end with '-gzip'
1751 store compressed data.
1752
1753 Returns:
1754 Instance of Storage.
1755 """
Marc-Antoine Ruelb10edf22014-12-11 13:33:57 -05001756 return Storage(get_storage_api(url, namespace))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001757
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001758
Vadim Shtayuraea38c572014-10-06 16:57:16 -07001759def upload_tree(base_url, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001760 """Uploads the given tree to the given url.
1761
1762 Arguments:
Vadim Shtayuraea38c572014-10-06 16:57:16 -07001763 base_url: The url of the isolate server to upload to.
1764 infiles: iterable of pairs (absolute path, metadata dict) of files.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001765 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001766 """
Vadim Shtayuraea38c572014-10-06 16:57:16 -07001767 # Convert |infiles| into a list of FileItem objects, skip duplicates.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001768 # Filter out symlinks, since they are not represented by items on isolate
1769 # server side.
Vadim Shtayuraea38c572014-10-06 16:57:16 -07001770 items = []
1771 seen = set()
1772 skipped = 0
1773 for filepath, metadata in infiles:
maruel12e30012015-10-09 11:55:35 -07001774 assert isinstance(filepath, unicode), filepath
Vadim Shtayuraea38c572014-10-06 16:57:16 -07001775 if 'l' not in metadata and filepath not in seen:
1776 seen.add(filepath)
1777 item = FileItem(
1778 path=filepath,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001779 digest=metadata['h'],
1780 size=metadata['s'],
1781 high_priority=metadata.get('priority') == '0')
Vadim Shtayuraea38c572014-10-06 16:57:16 -07001782 items.append(item)
1783 else:
1784 skipped += 1
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001785
Vadim Shtayuraea38c572014-10-06 16:57:16 -07001786 logging.info('Skipped %d duplicated entries', skipped)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001787 with get_storage(base_url, namespace) as storage:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001788 storage.upload_items(items)
Vadim Shtayura3148e072014-09-02 18:51:52 -07001789
1790
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001791def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001792 """Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001793
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001794 Arguments:
1795 isolated_hash: hash of the root *.isolated file.
1796 storage: Storage class that communicates with isolate storage.
1797 cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001798 outdir: Output directory to map file tree to.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001799 require_command: Ensure *.isolated specifies a command to run.
1800
1801 Returns:
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001802 IsolatedBundle object that holds details about loaded *.isolated file.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001803 """
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001804 logging.debug(
1805 'fetch_isolated(%s, %s, %s, %s, %s)',
1806 isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001807 # Hash algorithm to use, defined by namespace |storage| is using.
1808 algo = storage.hash_algo
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001809 with cache:
1810 fetch_queue = FetchQueue(storage, cache)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001811 bundle = IsolatedBundle()
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001812
1813 with tools.Profiler('GetIsolateds'):
1814 # Optionally support local files by manually adding them to cache.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001815 if not isolated_format.is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001816 logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
maruel1ceb3872015-10-14 06:10:44 -07001817 path = unicode(os.path.abspath(isolated_hash))
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001818 try:
maruel1ceb3872015-10-14 06:10:44 -07001819 isolated_hash = fetch_queue.inject_local_file(path, algo)
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001820 except IOError:
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04001821 raise isolated_format.MappingError(
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001822 '%s doesn\'t seem to be a valid file. Did you intent to pass a '
1823 'valid hash?' % isolated_hash)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001824
1825 # Load all *.isolated and start loading rest of the files.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001826 bundle.fetch(fetch_queue, isolated_hash, algo)
1827 if require_command and not bundle.command:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001828 # TODO(vadimsh): All fetch operations are already enqueue and there's no
1829 # easy way to cancel them.
maruela72f46e2016-02-24 11:05:45 -08001830 raise IsolatedErrorNoCommand()
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001831
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001832 with tools.Profiler('GetRest'):
1833 # Create file system hierarchy.
maruel12e30012015-10-09 11:55:35 -07001834 if not fs.isdir(outdir):
1835 fs.makedirs(outdir)
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001836 create_directories(outdir, bundle.files)
1837 create_symlinks(outdir, bundle.files.iteritems())
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001838
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001839 # Ensure working directory exists.
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001840 cwd = os.path.normpath(os.path.join(outdir, bundle.relative_cwd))
maruel12e30012015-10-09 11:55:35 -07001841 if not fs.isdir(cwd):
1842 fs.makedirs(cwd)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001843
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001844 # Multimap: digest -> list of pairs (path, props).
1845 remaining = {}
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001846 for filepath, props in bundle.files.iteritems():
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001847 if 'h' in props:
1848 remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001849
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001850 # Now block on the remaining files to be downloaded and mapped.
1851 logging.info('Retrieving remaining files (%d of them)...',
1852 fetch_queue.pending_count)
1853 last_update = time.time()
Vadim Shtayura3148e072014-09-02 18:51:52 -07001854 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001855 while remaining:
1856 detector.ping()
1857
1858 # Wait for any item to finish fetching to cache.
1859 digest = fetch_queue.wait(remaining)
1860
1861 # Link corresponding files to a fetched item in cache.
1862 for filepath, props in remaining.pop(digest):
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001863 cache.hardlink(
1864 digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001865
1866 # Report progress.
1867 duration = time.time() - last_update
1868 if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
1869 msg = '%d files remaining...' % len(remaining)
1870 print msg
1871 logging.info(msg)
1872 last_update = time.time()
1873
1874 # Cache could evict some items we just tried to fetch, it's a fatal error.
1875 if not fetch_queue.verify_all_cached():
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04001876 raise isolated_format.MappingError(
1877 'Cache is too small to hold all requested files')
Vadim Shtayura7f7459c2014-09-04 13:25:10 -07001878 return bundle
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001879
1880
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001881def directory_to_metadata(root, algo, blacklist):
1882 """Returns the FileItem list and .isolated metadata for a directory."""
1883 root = file_path.get_native_path_case(root)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04001884 paths = isolated_format.expand_directory_and_symlink(
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07001885 root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04001886 metadata = {
1887 relpath: isolated_format.file_to_metadata(
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -05001888 os.path.join(root, relpath), {}, 0, algo)
Marc-Antoine Ruel92257792014-08-28 20:51:08 -04001889 for relpath in paths
1890 }
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001891 for v in metadata.itervalues():
1892 v.pop('t')
1893 items = [
1894 FileItem(
1895 path=os.path.join(root, relpath),
1896 digest=meta['h'],
1897 size=meta['s'],
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001898 high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001899 for relpath, meta in metadata.iteritems() if 'h' in meta
1900 ]
1901 return items, metadata
1902
1903
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001904def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001905 """Stores every entries and returns the relevant data.
1906
1907 Arguments:
1908 storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001909 files: list of file paths to upload. If a directory is specified, a
1910 .isolated file is created and its hash is returned.
1911 blacklist: function that returns True if a file should be omitted.
1912 """
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001913 assert all(isinstance(i, unicode) for i in files), files
1914 if len(files) != len(set(map(os.path.abspath, files))):
1915 raise Error('Duplicate entries found.')
1916
1917 results = []
1918 # The temporary directory is only created as needed.
1919 tempdir = None
1920 try:
1921 # TODO(maruel): Yield the files to a worker thread.
1922 items_to_upload = []
1923 for f in files:
1924 try:
1925 filepath = os.path.abspath(f)
maruel12e30012015-10-09 11:55:35 -07001926 if fs.isdir(filepath):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001927 # Uploading a whole directory.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001928 items, metadata = directory_to_metadata(
1929 filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001930
1931 # Create the .isolated file.
1932 if not tempdir:
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -04001933 tempdir = tempfile.mkdtemp(prefix=u'isolateserver')
1934 handle, isolated = tempfile.mkstemp(dir=tempdir, suffix=u'.isolated')
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001935 os.close(handle)
1936 data = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001937 'algo':
1938 isolated_format.SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001939 'files': metadata,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001940 'version': isolated_format.ISOLATED_FILE_VERSION,
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001941 }
Marc-Antoine Ruel52436aa2014-08-28 21:57:57 -04001942 isolated_format.save_isolated(isolated, data)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001943 h = isolated_format.hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001944 items_to_upload.extend(items)
1945 items_to_upload.append(
1946 FileItem(
1947 path=isolated,
1948 digest=h,
maruel12e30012015-10-09 11:55:35 -07001949 size=fs.stat(isolated).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001950 high_priority=True))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001951 results.append((h, f))
1952
maruel12e30012015-10-09 11:55:35 -07001953 elif fs.isfile(filepath):
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001954 h = isolated_format.hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001955 items_to_upload.append(
1956 FileItem(
1957 path=filepath,
1958 digest=h,
maruel12e30012015-10-09 11:55:35 -07001959 size=fs.stat(filepath).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001960 high_priority=f.endswith('.isolated')))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001961 results.append((h, f))
1962 else:
1963 raise Error('%s is neither a file or directory.' % f)
1964 except OSError:
1965 raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001966 # Technically we would care about which files were uploaded but we don't
1967 # much in practice.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001968 _uploaded_files = storage.upload_items(items_to_upload)
1969 return results
1970 finally:
maruel12e30012015-10-09 11:55:35 -07001971 if tempdir and fs.isdir(tempdir):
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -04001972 file_path.rmtree(tempdir)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001973
1974
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05001975def archive(out, namespace, files, blacklist):
1976 if files == ['-']:
1977 files = sys.stdin.readlines()
1978
1979 if not files:
1980 raise Error('Nothing to upload')
1981
1982 files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05001983 blacklist = tools.gen_blacklist(blacklist)
1984 with get_storage(out, namespace) as storage:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001985 results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05001986 print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
1987
1988
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001989@subcommand.usage('<file1..fileN> or - to read from stdin')
1990def CMDarchive(parser, args):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001991 """Archives data to the server.
1992
1993 If a directory is specified, a .isolated file is created the whole directory
1994 is uploaded. Then this .isolated file can be included in another one to run
1995 commands.
1996
1997 The commands output each file that was processed with its content hash. For
1998 directories, the .isolated generated for the directory is listed as the
1999 directory entry itself.
2000 """
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002001 add_isolate_server_options(parser)
Marc-Antoine Ruel1f8ba352014-11-04 15:55:03 -05002002 add_archive_options(parser)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002003 options, files = parser.parse_args(args)
Marc-Antoine Ruele290ada2014-12-10 19:48:49 -05002004 process_isolate_server_options(parser, options, True)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002005 try:
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002006 archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002007 except Error as e:
2008 parser.error(e.args[0])
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002009 return 0
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002010
2011
2012def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002013 """Download data from the server.
2014
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002015 It can either download individual files or a complete tree from a .isolated
2016 file.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002017 """
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002018 add_isolate_server_options(parser)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002019 parser.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -05002020 '-s', '--isolated', metavar='HASH',
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002021 help='hash of an isolated file, .isolated file content is discarded, use '
2022 '--file if you need it')
2023 parser.add_option(
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002024 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
2025 help='hash and destination of a file, can be used multiple times')
2026 parser.add_option(
Marc-Antoine Ruelf90861c2015-03-24 20:54:49 -04002027 '-t', '--target', metavar='DIR', default='download',
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002028 help='destination directory')
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04002029 add_cache_options(parser)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002030 options, args = parser.parse_args(args)
2031 if args:
2032 parser.error('Unsupported arguments: %s' % args)
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002033
Marc-Antoine Ruele290ada2014-12-10 19:48:49 -05002034 process_isolate_server_options(parser, options, True)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002035 if bool(options.isolated) == bool(options.file):
2036 parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002037
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04002038 cache = process_cache_options(options)
maruel12e30012015-10-09 11:55:35 -07002039 options.target = unicode(os.path.abspath(options.target))
Marc-Antoine Ruelf90861c2015-03-24 20:54:49 -04002040 if options.isolated:
maruel12e30012015-10-09 11:55:35 -07002041 if (fs.isfile(options.target) or
2042 (fs.isdir(options.target) and fs.listdir(options.target))):
Marc-Antoine Ruelf90861c2015-03-24 20:54:49 -04002043 parser.error(
2044 '--target \'%s\' exists, please use another target' % options.target)
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002045 with get_storage(options.isolate_server, options.namespace) as storage:
Vadim Shtayura3172be52013-12-03 12:49:05 -08002046 # Fetching individual files.
2047 if options.file:
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04002048 # TODO(maruel): Enable cache in this case too.
Vadim Shtayura3172be52013-12-03 12:49:05 -08002049 channel = threading_utils.TaskChannel()
2050 pending = {}
2051 for digest, dest in options.file:
2052 pending[digest] = dest
2053 storage.async_fetch(
2054 channel,
Vadim Shtayura3148e072014-09-02 18:51:52 -07002055 threading_utils.PRIORITY_MED,
Vadim Shtayura3172be52013-12-03 12:49:05 -08002056 digest,
Vadim Shtayura3148e072014-09-02 18:51:52 -07002057 UNKNOWN_FILE_SIZE,
Vadim Shtayura3172be52013-12-03 12:49:05 -08002058 functools.partial(file_write, os.path.join(options.target, dest)))
2059 while pending:
2060 fetched = channel.pull()
2061 dest = pending.pop(fetched)
2062 logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002063
Vadim Shtayura3172be52013-12-03 12:49:05 -08002064 # Fetching whole isolated tree.
2065 if options.isolated:
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04002066 with cache:
2067 bundle = fetch_isolated(
2068 isolated_hash=options.isolated,
2069 storage=storage,
2070 cache=cache,
2071 outdir=options.target,
2072 require_command=False)
2073 if bundle.command:
2074 rel = os.path.join(options.target, bundle.relative_cwd)
2075 print('To run this test please run from the directory %s:' %
2076 os.path.join(options.target, rel))
2077 print(' ' + ' '.join(bundle.command))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002078
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002079 return 0
2080
2081
Marc-Antoine Ruel1f8ba352014-11-04 15:55:03 -05002082def add_archive_options(parser):
2083 parser.add_option(
2084 '--blacklist',
2085 action='append', default=list(DEFAULT_BLACKLIST),
2086 help='List of regexp to use as blacklist filter when uploading '
2087 'directories')
2088
2089
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002090def add_isolate_server_options(parser):
2091 """Adds --isolate-server and --namespace options to parser."""
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002092 parser.add_option(
2093 '-I', '--isolate-server',
2094 metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002095 help='URL of the Isolate Server to use. Defaults to the environment '
2096 'variable ISOLATE_SERVER if set. No need to specify https://, this '
2097 'is assumed.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002098 parser.add_option(
2099 '--namespace', default='default-gzip',
2100 help='The namespace to use on the Isolate Server, default: %default')
2101
2102
Marc-Antoine Ruele290ada2014-12-10 19:48:49 -05002103def process_isolate_server_options(parser, options, set_exception_handler):
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002104 """Processes the --isolate-server option and aborts if not specified.
2105
2106 Returns the identity as determined by the server.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002107 """
2108 if not options.isolate_server:
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002109 parser.error('--isolate-server is required.')
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05002110 try:
2111 options.isolate_server = net.fix_url(options.isolate_server)
2112 except ValueError as e:
2113 parser.error('--isolate-server %s' % e)
Marc-Antoine Ruele290ada2014-12-10 19:48:49 -05002114 if set_exception_handler:
2115 on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05002116 try:
2117 return auth.ensure_logged_in(options.isolate_server)
2118 except ValueError as e:
2119 parser.error(str(e))
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002120
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002121
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04002122def add_cache_options(parser):
2123 cache_group = optparse.OptionGroup(parser, 'Cache management')
2124 cache_group.add_option(
2125 '--cache', metavar='DIR',
2126 help='Directory to keep a local cache of the files. Accelerates download '
2127 'by reusing already downloaded files. Default=%default')
2128 cache_group.add_option(
2129 '--max-cache-size',
2130 type='int',
2131 metavar='NNN',
maruel71586102016-01-29 11:44:09 -08002132 default=50*1024*1024*1024,
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04002133 help='Trim if the cache gets larger than this value, default=%default')
2134 cache_group.add_option(
2135 '--min-free-space',
2136 type='int',
2137 metavar='NNN',
2138 default=2*1024*1024*1024,
2139 help='Trim if disk free space becomes lower than this value, '
2140 'default=%default')
2141 cache_group.add_option(
2142 '--max-items',
2143 type='int',
2144 metavar='NNN',
2145 default=100000,
2146 help='Trim if more than this number of items are in the cache '
2147 'default=%default')
2148 parser.add_option_group(cache_group)
2149
2150
2151def process_cache_options(options):
2152 if options.cache:
2153 policies = CachePolicies(
2154 options.max_cache_size, options.min_free_space, options.max_items)
2155
2156 # |options.cache| path may not exist until DiskCache() instance is created.
2157 return DiskCache(
Marc-Antoine Ruel3c979cb2015-03-11 13:43:28 -04002158 unicode(os.path.abspath(options.cache)),
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04002159 policies,
2160 isolated_format.get_hash_algo(options.namespace))
2161 else:
2162 return MemoryCache()
2163
2164
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04002165class OptionParserIsolateServer(logging_utils.OptionParserWithLogging):
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002166 def __init__(self, **kwargs):
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04002167 logging_utils.OptionParserWithLogging.__init__(
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002168 self,
2169 version=__version__,
2170 prog=os.path.basename(sys.modules[__name__].__file__),
2171 **kwargs)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08002172 auth.add_auth_options(self)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002173
2174 def parse_args(self, *args, **kwargs):
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04002175 options, args = logging_utils.OptionParserWithLogging.parse_args(
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002176 self, *args, **kwargs)
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08002177 auth.process_auth_options(self, options)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002178 return options, args
2179
2180
2181def main(args):
2182 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04002183 return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00002184
2185
2186if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002187 fix_encoding.fix_encoding()
2188 tools.disable_buffering()
2189 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002190 sys.exit(main(sys.argv[1:]))