blob: 6649bbe6f1418edbfb102f63136f4e77f83eab36 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00005
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05006"""Archives a set of files or directories to a server."""
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00007
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05008__version__ = '0.3'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +000010import functools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000011import hashlib
maruel@chromium.org41601642013-09-18 19:40:46 +000012import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000013import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000014import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000015import re
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050016import shutil
17import stat
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000018import sys
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050019import tempfile
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000020import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000021import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000022import urllib
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000023import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000024
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000025from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
28
Marc-Antoine Ruel37989932013-11-19 16:28:08 -050029from utils import file_path
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000031from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000032from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000033
34
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000035# Version of isolate protocol passed to the server in /handshake request.
36ISOLATE_PROTOCOL_VERSION = '1.0'
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000037
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000038
39# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000040# All files are sorted by likelihood of a change in the file content
41# (currently file size is used to estimate this: larger the file -> larger the
42# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000043# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000044# and so on. Numbers here is a trade-off; the more per request, the lower the
45# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
46# larger values cause longer lookups, increasing the initial latency to start
47# uploading, which is especially an issue for large files. This value is
48# optimized for the "few thousands files to look up with minimal number of large
49# files missing" case.
50ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000051
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000052
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000053# A list of already compressed extension types that should not receive any
54# compression before being uploaded.
55ALREADY_COMPRESSED_TYPES = [
56 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'pdf', 'png',
57 'wav', 'zip'
58]
59
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000060
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000061# The file size to be used when we don't know the correct file size,
62# generally used for .isolated files.
63UNKNOWN_FILE_SIZE = None
64
65
66# The size of each chunk to read when downloading and unzipping files.
67ZIPPED_FILE_CHUNK = 16 * 1024
68
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000069# Chunk size to use when doing disk I/O.
70DISK_FILE_CHUNK = 1024 * 1024
71
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000072# Chunk size to use when reading from network stream.
73NET_IO_FILE_CHUNK = 16 * 1024
74
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000075
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000076# Read timeout in seconds for downloads from isolate storage. If there's no
77# response from the server within this timeout whole download will be aborted.
78DOWNLOAD_READ_TIMEOUT = 60
79
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +000080# Maximum expected delay (in seconds) between successive file fetches
81# in run_tha_test. If it takes longer than that, a deadlock might be happening
82# and all stack frames for all threads are dumped to log.
83DEADLOCK_TIMEOUT = 5 * 60
84
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000085
maruel@chromium.org41601642013-09-18 19:40:46 +000086# The delay (in seconds) to wait between logging statements when retrieving
87# the required files. This is intended to let the user (or buildbot) know that
88# the program is still running.
89DELAY_BETWEEN_UPDATES_IN_SECS = 30
90
91
maruel@chromium.org385d73d2013-09-19 18:33:21 +000092# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
93# specify the names here.
94SUPPORTED_ALGOS = {
95 'md5': hashlib.md5,
96 'sha-1': hashlib.sha1,
97 'sha-512': hashlib.sha512,
98}
99
100
101# Used for serialization.
102SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
103
104
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -0500105DEFAULT_BLACKLIST = (
106 # Temporary vim or python files.
107 r'^.+\.(?:pyc|swp)$',
108 # .git or .svn directory.
109 r'^(?:.+' + re.escape(os.path.sep) + r'|)\.(?:git|svn)$',
110)
111
112
113# Chromium-specific.
114DEFAULT_BLACKLIST += (
115 r'^.+\.(?:run_test_cases)$',
116 r'^(?:.+' + re.escape(os.path.sep) + r'|)testserver\.log$',
117)
118
119
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -0500120class Error(Exception):
121 """Generic runtime error."""
122 pass
123
124
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000125class ConfigError(ValueError):
126 """Generic failure to load a .isolated file."""
127 pass
128
129
130class MappingError(OSError):
131 """Failed to recreate the tree."""
132 pass
133
134
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000135def is_valid_hash(value, algo):
136 """Returns if the value is a valid hash for the corresponding algorithm."""
137 size = 2 * algo().digest_size
138 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
139
140
141def hash_file(filepath, algo):
142 """Calculates the hash of a file without reading it all in memory at once.
143
144 |algo| should be one of hashlib hashing algorithm.
145 """
146 digest = algo()
maruel@chromium.org037758d2012-12-10 17:59:46 +0000147 with open(filepath, 'rb') as f:
148 while True:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000149 chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org037758d2012-12-10 17:59:46 +0000150 if not chunk:
151 break
152 digest.update(chunk)
153 return digest.hexdigest()
154
155
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000156def stream_read(stream, chunk_size):
157 """Reads chunks from |stream| and yields them."""
158 while True:
159 data = stream.read(chunk_size)
160 if not data:
161 break
162 yield data
163
164
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000165def file_read(filepath, chunk_size=DISK_FILE_CHUNK):
166 """Yields file content in chunks of given |chunk_size|."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000167 with open(filepath, 'rb') as f:
168 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000169 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000170 if not data:
171 break
172 yield data
173
174
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000175def file_write(filepath, content_generator):
176 """Writes file content as generated by content_generator.
177
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000178 Creates the intermediary directory as needed.
179
180 Returns the number of bytes written.
181
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000182 Meant to be mocked out in unit tests.
183 """
184 filedir = os.path.dirname(filepath)
185 if not os.path.isdir(filedir):
186 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000187 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000188 with open(filepath, 'wb') as f:
189 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000190 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000191 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000192 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000193
194
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000195def zip_compress(content_generator, level=7):
196 """Reads chunks from |content_generator| and yields zip compressed chunks."""
197 compressor = zlib.compressobj(level)
198 for chunk in content_generator:
199 compressed = compressor.compress(chunk)
200 if compressed:
201 yield compressed
202 tail = compressor.flush(zlib.Z_FINISH)
203 if tail:
204 yield tail
205
206
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000207def zip_decompress(content_generator, chunk_size=DISK_FILE_CHUNK):
208 """Reads zipped data from |content_generator| and yields decompressed data.
209
210 Decompresses data in small chunks (no larger than |chunk_size|) so that
211 zip bomb file doesn't cause zlib to preallocate huge amount of memory.
212
213 Raises IOError if data is corrupted or incomplete.
214 """
215 decompressor = zlib.decompressobj()
216 compressed_size = 0
217 try:
218 for chunk in content_generator:
219 compressed_size += len(chunk)
220 data = decompressor.decompress(chunk, chunk_size)
221 if data:
222 yield data
223 while decompressor.unconsumed_tail:
224 data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
225 if data:
226 yield data
227 tail = decompressor.flush()
228 if tail:
229 yield tail
230 except zlib.error as e:
231 raise IOError(
232 'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
233 # Ensure all data was read and decompressed.
234 if decompressor.unused_data or decompressor.unconsumed_tail:
235 raise IOError('Not all data was decompressed')
236
237
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000238def get_zip_compression_level(filename):
239 """Given a filename calculates the ideal zip compression level to use."""
240 file_ext = os.path.splitext(filename)[1].lower()
241 # TODO(csharp): Profile to find what compression level works best.
242 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
243
244
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000245def create_directories(base_directory, files):
246 """Creates the directory structure needed by the given list of files."""
247 logging.debug('create_directories(%s, %d)', base_directory, len(files))
248 # Creates the tree of directories to create.
249 directories = set(os.path.dirname(f) for f in files)
250 for item in list(directories):
251 while item:
252 directories.add(item)
253 item = os.path.dirname(item)
254 for d in sorted(directories):
255 if d:
256 os.mkdir(os.path.join(base_directory, d))
257
258
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500259def create_symlinks(base_directory, files):
260 """Creates any symlinks needed by the given set of files."""
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000261 for filepath, properties in files:
262 if 'l' not in properties:
263 continue
264 if sys.platform == 'win32':
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500265 # TODO(maruel): Create symlink via the win32 api.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000266 logging.warning('Ignoring symlink %s', filepath)
267 continue
268 outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500269 # os.symlink() doesn't exist on Windows.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000270 os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000271
272
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000273def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000274 """Determines if the given files appears valid.
275
276 Currently it just checks the file's size.
277 """
278 if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000279 return os.path.isfile(filepath)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000280 actual_size = os.stat(filepath).st_size
281 if size != actual_size:
282 logging.warning(
283 'Found invalid item %s; %d != %d',
284 os.path.basename(filepath), actual_size, size)
285 return False
286 return True
287
288
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000289class WorkerPool(threading_utils.AutoRetryThreadPool):
290 """Thread pool that automatically retries on IOError and runs a preconfigured
291 function.
292 """
293 # Initial and maximum number of worker threads.
294 INITIAL_WORKERS = 2
295 MAX_WORKERS = 16
296 RETRIES = 5
297
298 def __init__(self):
299 super(WorkerPool, self).__init__(
300 [IOError],
301 self.RETRIES,
302 self.INITIAL_WORKERS,
303 self.MAX_WORKERS,
304 0,
305 'remote')
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000306
307
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000308class Item(object):
309 """An item to push to Storage.
310
311 It starts its life in a main thread, travels to 'contains' thread, then to
312 'push' thread and then finally back to the main thread.
313
314 It is never used concurrently from multiple threads.
315 """
316
317 def __init__(self, digest, size, is_isolated=False):
318 self.digest = digest
319 self.size = size
320 self.is_isolated = is_isolated
321 self.compression_level = 6
322 self.push_state = None
323
324 def content(self, chunk_size):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000325 """Iterable with content of this item in chunks of given size.
326
327 Arguments:
328 chunk_size: preferred size of the chunk to produce, may be ignored.
329 """
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000330 raise NotImplementedError()
331
332
333class FileItem(Item):
334 """A file to push to Storage."""
335
336 def __init__(self, path, digest, size, is_isolated):
337 super(FileItem, self).__init__(digest, size, is_isolated)
338 self.path = path
339 self.compression_level = get_zip_compression_level(path)
340
341 def content(self, chunk_size):
342 return file_read(self.path, chunk_size)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000343
344
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000345class BufferItem(Item):
346 """A byte buffer to push to Storage."""
347
348 def __init__(self, buf, algo, is_isolated=False):
349 super(BufferItem, self).__init__(
350 algo(buf).hexdigest(), len(buf), is_isolated)
351 self.buffer = buf
352
353 def content(self, _chunk_size):
354 return [self.buffer]
355
356
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000357class Storage(object):
358 """Efficiently downloads or uploads large set of files via StorageApi."""
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000359
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000360 def __init__(self, storage_api, use_zip):
361 self.use_zip = use_zip
362 self._storage_api = storage_api
363 self._cpu_thread_pool = None
364 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000365
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000366 @property
367 def cpu_thread_pool(self):
368 """ThreadPool for CPU-bound tasks like zipping."""
369 if self._cpu_thread_pool is None:
370 self._cpu_thread_pool = threading_utils.ThreadPool(
371 2, max(threading_utils.num_processors(), 2), 0, 'zip')
372 return self._cpu_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000373
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000374 @property
375 def net_thread_pool(self):
376 """AutoRetryThreadPool for IO-bound tasks, retries IOError."""
377 if self._net_thread_pool is None:
378 self._net_thread_pool = WorkerPool()
379 return self._net_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000380
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000381 def close(self):
382 """Waits for all pending tasks to finish."""
383 if self._cpu_thread_pool:
384 self._cpu_thread_pool.join()
385 self._cpu_thread_pool.close()
386 self._cpu_thread_pool = None
387 if self._net_thread_pool:
388 self._net_thread_pool.join()
389 self._net_thread_pool.close()
390 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000391
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000392 def __enter__(self):
393 """Context manager interface."""
394 return self
395
396 def __exit__(self, _exc_type, _exc_value, _traceback):
397 """Context manager interface."""
398 self.close()
399 return False
400
401 def upload_tree(self, indir, infiles):
402 """Uploads the given tree to the isolate server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000403
404 Arguments:
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000405 indir: root directory the infiles are based in.
406 infiles: dict of files to upload from |indir|.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000407
408 Returns:
409 List of items that were uploaded. All other items are already there.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000410 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000411 logging.info('upload tree(indir=%s, files=%d)', indir, len(infiles))
412
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000413 # Convert |indir| + |infiles| into a list of FileItem objects.
414 # Filter out symlinks, since they are not represented by items on isolate
415 # server side.
416 items = [
417 FileItem(
418 path=os.path.join(indir, filepath),
419 digest=metadata['h'],
420 size=metadata['s'],
421 is_isolated=metadata.get('priority') == '0')
422 for filepath, metadata in infiles.iteritems()
423 if 'l' not in metadata
424 ]
425
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000426 return self.upload_items(items)
427
428 def upload_items(self, items):
429 """Uploads bunch of items to the isolate server.
430
431 Will upload only items that are missing.
432
433 Arguments:
434 items: list of Item instances that represents data to upload.
435
436 Returns:
437 List of items that were uploaded. All other items are already there.
438 """
439 # TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
440 # used by swarming.py. There's no need to spawn multiple threads and try to
441 # do stuff in parallel: there's nothing to parallelize. 'contains' check and
442 # 'push' should be performed sequentially in the context of current thread.
443
vadimsh@chromium.org672cd2b2013-10-08 17:49:33 +0000444 # For each digest keep only first Item that matches it. All other items
445 # are just indistinguishable copies from the point of view of isolate
446 # server (it doesn't care about paths at all, only content and digests).
447 seen = {}
448 duplicates = 0
449 for item in items:
450 if seen.setdefault(item.digest, item) is not item:
451 duplicates += 1
452 items = seen.values()
453 if duplicates:
454 logging.info('Skipped %d duplicated files', duplicates)
455
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000456 # Enqueue all upload tasks.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000457 missing = set()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000458 channel = threading_utils.TaskChannel()
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000459 for missing_item in self.get_missing_items(items):
460 missing.add(missing_item)
461 self.async_push(
462 channel,
463 WorkerPool.HIGH if missing_item.is_isolated else WorkerPool.MED,
464 missing_item)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000465
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000466 uploaded = []
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000467 # No need to spawn deadlock detector thread if there's nothing to upload.
468 if missing:
469 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
470 # Wait for all started uploads to finish.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000471 while len(uploaded) != len(missing):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000472 detector.ping()
473 item = channel.pull()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000474 uploaded.append(item)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000475 logging.debug(
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000476 'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000477 logging.info('All files are uploaded')
478
479 # Print stats.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000480 total = len(items)
481 total_size = sum(f.size for f in items)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000482 logging.info(
483 'Total: %6d, %9.1fkb',
484 total,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000485 total_size / 1024.)
486 cache_hit = set(items) - missing
487 cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000488 logging.info(
489 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
490 len(cache_hit),
491 cache_hit_size / 1024.,
492 len(cache_hit) * 100. / total,
493 cache_hit_size * 100. / total_size if total_size else 0)
494 cache_miss = missing
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000495 cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000496 logging.info(
497 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
498 len(cache_miss),
499 cache_miss_size / 1024.,
500 len(cache_miss) * 100. / total,
501 cache_miss_size * 100. / total_size if total_size else 0)
502
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000503 return uploaded
504
505 def get_fetch_url(self, digest):
506 """Returns an URL that can be used to fetch an item with given digest.
507
508 Arguments:
509 digest: hex digest of item to fetch.
510
511 Returns:
512 An URL or None if underlying protocol doesn't support this.
513 """
514 return self._storage_api.get_fetch_url(digest)
515
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000516 def async_push(self, channel, priority, item):
517 """Starts asynchronous push to the server in a parallel thread.
518
519 Arguments:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000520 channel: TaskChannel that receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000521 priority: thread pool task priority for the push.
522 item: item to upload as instance of Item class.
523 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000524 def push(content):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000525 """Pushes an item and returns its id, to pass as a result to |channel|."""
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000526 self._storage_api.push(item, content)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000527 return item
528
529 # If zipping is not required, just start a push task.
530 if not self.use_zip:
531 self.net_thread_pool.add_task_with_channel(channel, priority, push,
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000532 item.content(DISK_FILE_CHUNK))
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000533 return
534
535 # If zipping is enabled, zip in a separate thread.
536 def zip_and_push():
537 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
538 # content right here. It will block until all file is zipped.
539 try:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000540 stream = zip_compress(item.content(ZIPPED_FILE_CHUNK),
541 item.compression_level)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000542 data = ''.join(stream)
543 except Exception as exc:
544 logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800545 channel.send_exception()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000546 return
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000547 self.net_thread_pool.add_task_with_channel(
548 channel, priority, push, [data])
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000549 self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000550
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000551 def async_fetch(self, channel, priority, digest, size, sink):
552 """Starts asynchronous fetch from the server in a parallel thread.
553
554 Arguments:
555 channel: TaskChannel that receives back |digest| when download ends.
556 priority: thread pool task priority for the fetch.
557 digest: hex digest of an item to download.
558 size: expected size of the item (after decompression).
559 sink: function that will be called as sink(generator).
560 """
561 def fetch():
562 try:
563 # Prepare reading pipeline.
564 stream = self._storage_api.fetch(digest)
565 if self.use_zip:
566 stream = zip_decompress(stream, DISK_FILE_CHUNK)
567 # Run |stream| through verifier that will assert its size.
568 verifier = FetchStreamVerifier(stream, size)
569 # Verified stream goes to |sink|.
570 sink(verifier.run())
571 except Exception as err:
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800572 logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000573 raise
574 return digest
575
576 # Don't bother with zip_thread_pool for decompression. Decompression is
577 # really fast and most probably IO bound anyway.
578 self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
579
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000580 def get_missing_items(self, items):
581 """Yields items that are missing from the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000582
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000583 Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000584
585 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000586 items: a list of Item objects to check.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000587
588 Yields:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000589 Item objects that are missing from the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000590 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000591 channel = threading_utils.TaskChannel()
592 pending = 0
593 # Enqueue all requests.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000594 for batch in self.batch_items_for_check(items):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000595 self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
596 self._storage_api.contains, batch)
597 pending += 1
598 # Yield results as they come in.
599 for _ in xrange(pending):
600 for missing in channel.pull():
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000601 yield missing
602
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000603 @staticmethod
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000604 def batch_items_for_check(items):
605 """Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000606
607 Each batch corresponds to a single 'exists?' query to the server via a call
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000608 to StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000609
610 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000611 items: a list of Item objects.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000612
613 Yields:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000614 Batches of items to query for existence in a single operation,
615 each batch is a list of Item objects.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000616 """
617 batch_count = 0
618 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
619 next_queries = []
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000620 for item in sorted(items, key=lambda x: x.size, reverse=True):
621 next_queries.append(item)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000622 if len(next_queries) == batch_size_limit:
623 yield next_queries
624 next_queries = []
625 batch_count += 1
626 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
627 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
628 if next_queries:
629 yield next_queries
630
631
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000632class FetchQueue(object):
633 """Fetches items from Storage and places them into LocalCache.
634
635 It manages multiple concurrent fetch operations. Acts as a bridge between
636 Storage and LocalCache so that Storage and LocalCache don't depend on each
637 other at all.
638 """
639
640 def __init__(self, storage, cache):
641 self.storage = storage
642 self.cache = cache
643 self._channel = threading_utils.TaskChannel()
644 self._pending = set()
645 self._accessed = set()
646 self._fetched = cache.cached_set()
647
648 def add(self, priority, digest, size=UNKNOWN_FILE_SIZE):
649 """Starts asynchronous fetch of item |digest|."""
650 # Fetching it now?
651 if digest in self._pending:
652 return
653
654 # Mark this file as in use, verify_all_cached will later ensure it is still
655 # in cache.
656 self._accessed.add(digest)
657
658 # Already fetched? Notify cache to update item's LRU position.
659 if digest in self._fetched:
660 # 'touch' returns True if item is in cache and not corrupted.
661 if self.cache.touch(digest, size):
662 return
663 # Item is corrupted, remove it from cache and fetch it again.
664 self._fetched.remove(digest)
665 self.cache.evict(digest)
666
667 # TODO(maruel): It should look at the free disk space, the current cache
668 # size and the size of the new item on every new item:
669 # - Trim the cache as more entries are listed when free disk space is low,
670 # otherwise if the amount of data downloaded during the run > free disk
671 # space, it'll crash.
672 # - Make sure there's enough free disk space to fit all dependencies of
673 # this run! If not, abort early.
674
675 # Start fetching.
676 self._pending.add(digest)
677 self.storage.async_fetch(
678 self._channel, priority, digest, size,
679 functools.partial(self.cache.write, digest))
680
681 def wait(self, digests):
682 """Starts a loop that waits for at least one of |digests| to be retrieved.
683
684 Returns the first digest retrieved.
685 """
686 # Flush any already fetched items.
687 for digest in digests:
688 if digest in self._fetched:
689 return digest
690
691 # Ensure all requested items are being fetched now.
692 assert all(digest in self._pending for digest in digests), (
693 digests, self._pending)
694
695 # Wait for some requested item to finish fetching.
696 while self._pending:
697 digest = self._channel.pull()
698 self._pending.remove(digest)
699 self._fetched.add(digest)
700 if digest in digests:
701 return digest
702
703 # Should never reach this point due to assert above.
704 raise RuntimeError('Impossible state')
705
706 def inject_local_file(self, path, algo):
707 """Adds local file to the cache as if it was fetched from storage."""
708 with open(path, 'rb') as f:
709 data = f.read()
710 digest = algo(data).hexdigest()
711 self.cache.write(digest, [data])
712 self._fetched.add(digest)
713 return digest
714
715 @property
716 def pending_count(self):
717 """Returns number of items to be fetched."""
718 return len(self._pending)
719
720 def verify_all_cached(self):
721 """True if all accessed items are in cache."""
722 return self._accessed.issubset(self.cache.cached_set())
723
724
725class FetchStreamVerifier(object):
726 """Verifies that fetched file is valid before passing it to the LocalCache."""
727
728 def __init__(self, stream, expected_size):
729 self.stream = stream
730 self.expected_size = expected_size
731 self.current_size = 0
732
733 def run(self):
734 """Generator that yields same items as |stream|.
735
736 Verifies |stream| is complete before yielding a last chunk to consumer.
737
738 Also wraps IOError produced by consumer into MappingError exceptions since
739 otherwise Storage will retry fetch on unrelated local cache errors.
740 """
741 # Read one chunk ahead, keep it in |stored|.
742 # That way a complete stream can be verified before pushing last chunk
743 # to consumer.
744 stored = None
745 for chunk in self.stream:
746 assert chunk is not None
747 if stored is not None:
748 self._inspect_chunk(stored, is_last=False)
749 try:
750 yield stored
751 except IOError as exc:
752 raise MappingError('Failed to store an item in cache: %s' % exc)
753 stored = chunk
754 if stored is not None:
755 self._inspect_chunk(stored, is_last=True)
756 try:
757 yield stored
758 except IOError as exc:
759 raise MappingError('Failed to store an item in cache: %s' % exc)
760
761 def _inspect_chunk(self, chunk, is_last):
762 """Called for each fetched chunk before passing it to consumer."""
763 self.current_size += len(chunk)
764 if (is_last and (self.expected_size != UNKNOWN_FILE_SIZE) and
765 (self.expected_size != self.current_size)):
766 raise IOError('Incorrect file size: expected %d, got %d' % (
767 self.expected_size, self.current_size))
768
769
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000770class StorageApi(object):
771 """Interface for classes that implement low-level storage operations."""
772
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000773 def get_fetch_url(self, digest):
774 """Returns an URL that can be used to fetch an item with given digest.
775
776 Arguments:
777 digest: hex digest of item to fetch.
778
779 Returns:
780 An URL or None if the protocol doesn't support this.
781 """
782 raise NotImplementedError()
783
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000784 def fetch(self, digest):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000785 """Fetches an object and yields its content.
786
787 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000788 digest: hash digest of item to download.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000789
790 Yields:
791 Chunks of downloaded item (as str objects).
792 """
793 raise NotImplementedError()
794
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000795 def push(self, item, content):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000796 """Uploads an |item| with content generated by |content| generator.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000797
798 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000799 item: Item object that holds information about an item being pushed.
800 content: a generator that yields chunks to push.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000801
802 Returns:
803 None.
804 """
805 raise NotImplementedError()
806
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000807 def contains(self, items):
808 """Checks for existence of given |items| on the server.
809
810 Mutates |items| by assigning opaque implement specific object to Item's
811 push_state attribute on missing entries in the datastore.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000812
813 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000814 items: list of Item objects.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000815
816 Returns:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000817 A list of items missing on server as a list of Item objects.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000818 """
819 raise NotImplementedError()
820
821
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000822class IsolateServer(StorageApi):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000823 """StorageApi implementation that downloads and uploads to Isolate Server.
824
825 It uploads and downloads directly from Google Storage whenever appropriate.
826 """
827
828 class _PushState(object):
829 """State needed to call .push(), to be stored in Item.push_state."""
830 def __init__(self, upload_url, finalize_url):
831 self.upload_url = upload_url
832 self.finalize_url = finalize_url
833 self.uploaded = False
834 self.finalized = False
835
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000836 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000837 super(IsolateServer, self).__init__()
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000838 assert base_url.startswith('http'), base_url
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000839 self.base_url = base_url.rstrip('/')
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000840 self.namespace = namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000841 self._lock = threading.Lock()
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000842 self._server_caps = None
843
844 @staticmethod
845 def _generate_handshake_request():
846 """Returns a dict to be sent as handshake request body."""
847 # TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
848 return {
849 'client_app_version': __version__,
850 'fetcher': True,
851 'protocol_version': ISOLATE_PROTOCOL_VERSION,
852 'pusher': True,
853 }
854
855 @staticmethod
856 def _validate_handshake_response(caps):
857 """Validates and normalizes handshake response."""
858 logging.info('Protocol version: %s', caps['protocol_version'])
859 logging.info('Server version: %s', caps['server_app_version'])
860 if caps.get('error'):
861 raise MappingError(caps['error'])
862 if not caps['access_token']:
863 raise ValueError('access_token is missing')
864 return caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000865
866 @property
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000867 def _server_capabilities(self):
868 """Performs handshake with the server if not yet done.
869
870 Returns:
871 Server capabilities dictionary as returned by /handshake endpoint.
872
873 Raises:
874 MappingError if server rejects the handshake.
875 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000876 # TODO(maruel): Make this request much earlier asynchronously while the
877 # files are being enumerated.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000878 with self._lock:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000879 if self._server_caps is None:
880 request_body = json.dumps(
881 self._generate_handshake_request(), separators=(',', ':'))
882 response = net.url_read(
883 url=self.base_url + '/content-gs/handshake',
884 data=request_body,
885 content_type='application/json',
886 method='POST')
887 if response is None:
888 raise MappingError('Failed to perform handshake.')
889 try:
890 caps = json.loads(response)
891 if not isinstance(caps, dict):
892 raise ValueError('Expecting JSON dict')
893 self._server_caps = self._validate_handshake_response(caps)
894 except (ValueError, KeyError, TypeError) as exc:
895 # KeyError exception has very confusing str conversion: it's just a
896 # missing key value and nothing else. So print exception class name
897 # as well.
898 raise MappingError('Invalid handshake response (%s): %s' % (
899 exc.__class__.__name__, exc))
900 return self._server_caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000901
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000902 def get_fetch_url(self, digest):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000903 assert isinstance(digest, basestring)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000904 return '%s/content-gs/retrieve/%s/%s' % (
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000905 self.base_url, self.namespace, digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000906
907 def fetch(self, digest):
908 source_url = self.get_fetch_url(digest)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000909 logging.debug('download_file(%s)', source_url)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000910
911 # Because the app engine DB is only eventually consistent, retry 404 errors
912 # because the file might just not be visible yet (even though it has been
913 # uploaded).
914 connection = net.url_open(
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000915 source_url, retry_404=True, read_timeout=DOWNLOAD_READ_TIMEOUT)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000916 if not connection:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000917 raise IOError('Unable to open connection to %s' % source_url)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000918 return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000919
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000920 def push(self, item, content):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000921 assert isinstance(item, Item)
922 assert isinstance(item.push_state, IsolateServer._PushState)
923 assert not item.push_state.finalized
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000924
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000925 # TODO(vadimsh): Do not read from |content| generator when retrying push.
926 # If |content| is indeed a generator, it can not be re-winded back
927 # to the beginning of the stream. A retry will find it exhausted. A possible
928 # solution is to wrap |content| generator with some sort of caching
929 # restartable generator. It should be done alongside streaming support
930 # implementation.
931
932 # This push operation may be a retry after failed finalization call below,
933 # no need to reupload contents in that case.
934 if not item.push_state.uploaded:
935 # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
936 # upload support is implemented.
937 if isinstance(content, list) and len(content) == 1:
938 content = content[0]
939 else:
940 content = ''.join(content)
941 # PUT file to |upload_url|.
942 response = net.url_read(
943 url=item.push_state.upload_url,
944 data=content,
945 content_type='application/octet-stream',
946 method='PUT')
947 if response is None:
948 raise IOError('Failed to upload a file %s to %s' % (
949 item.digest, item.push_state.upload_url))
950 item.push_state.uploaded = True
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000951 else:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000952 logging.info(
953 'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000954
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000955 # Optionally notify the server that it's done.
956 if item.push_state.finalize_url:
957 # TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
958 # send it to isolated server. That way isolate server can verify that
959 # the data safely reached Google Storage (GS provides MD5 and CRC32C of
960 # stored files).
961 response = net.url_read(
962 url=item.push_state.finalize_url,
963 data='',
964 content_type='application/json',
965 method='POST')
966 if response is None:
967 raise IOError('Failed to finalize an upload of %s' % item.digest)
968 item.push_state.finalized = True
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000969
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000970 def contains(self, items):
971 logging.info('Checking existence of %d files...', len(items))
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000972
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000973 # Request body is a json encoded list of dicts.
974 body = [
975 {
976 'h': item.digest,
977 's': item.size,
978 'i': int(item.is_isolated),
979 } for item in items
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000980 ]
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000981
982 query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
983 self.base_url,
984 self.namespace,
985 urllib.quote(self._server_capabilities['access_token']))
986 response_body = net.url_read(
987 url=query_url,
988 data=json.dumps(body, separators=(',', ':')),
989 content_type='application/json',
990 method='POST')
991 if response_body is None:
992 raise MappingError('Failed to execute /pre-upload query')
993
994 # Response body is a list of push_urls (or null if file is already present).
995 try:
996 response = json.loads(response_body)
997 if not isinstance(response, list):
998 raise ValueError('Expecting response with json-encoded list')
999 if len(response) != len(items):
1000 raise ValueError(
1001 'Incorrect number of items in the list, expected %d, '
1002 'but got %d' % (len(items), len(response)))
1003 except ValueError as err:
1004 raise MappingError(
1005 'Invalid response from server: %s, body is %s' % (err, response_body))
1006
1007 # Pick Items that are missing, attach _PushState to them.
1008 missing_items = []
1009 for i, push_urls in enumerate(response):
1010 if push_urls:
1011 assert len(push_urls) == 2, str(push_urls)
1012 item = items[i]
1013 assert item.push_state is None
1014 item.push_state = IsolateServer._PushState(push_urls[0], push_urls[1])
1015 missing_items.append(item)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001016 logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001017 len(items), len(items) - len(missing_items))
1018 return missing_items
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001019
1020
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001021class FileSystem(StorageApi):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001022 """StorageApi implementation that fetches data from the file system.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001023
1024 The common use case is a NFS/CIFS file server that is mounted locally that is
1025 used to fetch the file on a local partition.
1026 """
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001027
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001028 def __init__(self, base_path):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001029 super(FileSystem, self).__init__()
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001030 self.base_path = base_path
1031
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001032 def get_fetch_url(self, digest):
1033 return None
1034
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001035 def fetch(self, digest):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001036 assert isinstance(digest, basestring)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001037 return file_read(os.path.join(self.base_path, digest))
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001038
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001039 def push(self, item, content):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001040 assert isinstance(item, Item)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001041 file_write(os.path.join(self.base_path, item.digest), content)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001042
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001043 def contains(self, items):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001044 return [
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001045 item for item in items
1046 if not os.path.exists(os.path.join(self.base_path, item.digest))
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001047 ]
1048
1049
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001050class LocalCache(object):
1051 """Local cache that stores objects fetched via Storage.
1052
1053 It can be accessed concurrently from multiple threads, so it should protect
1054 its internal state with some lock.
1055 """
1056
1057 def __enter__(self):
1058 """Context manager interface."""
1059 return self
1060
1061 def __exit__(self, _exc_type, _exec_value, _traceback):
1062 """Context manager interface."""
1063 return False
1064
1065 def cached_set(self):
1066 """Returns a set of all cached digests (always a new object)."""
1067 raise NotImplementedError()
1068
1069 def touch(self, digest, size):
1070 """Ensures item is not corrupted and updates its LRU position.
1071
1072 Arguments:
1073 digest: hash digest of item to check.
1074 size: expected size of this item.
1075
1076 Returns:
1077 True if item is in cache and not corrupted.
1078 """
1079 raise NotImplementedError()
1080
1081 def evict(self, digest):
1082 """Removes item from cache if it's there."""
1083 raise NotImplementedError()
1084
1085 def read(self, digest):
1086 """Returns contents of the cached item as a single str."""
1087 raise NotImplementedError()
1088
1089 def write(self, digest, content):
1090 """Reads data from |content| generator and stores it in cache."""
1091 raise NotImplementedError()
1092
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001093 def hardlink(self, digest, dest, file_mode):
1094 """Ensures file at |dest| has same content as cached |digest|.
1095
1096 If file_mode is provided, it is used to set the executable bit if
1097 applicable.
1098 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001099 raise NotImplementedError()
1100
1101
1102class MemoryCache(LocalCache):
1103 """LocalCache implementation that stores everything in memory."""
1104
1105 def __init__(self):
1106 super(MemoryCache, self).__init__()
1107 # Let's not assume dict is thread safe.
1108 self._lock = threading.Lock()
1109 self._contents = {}
1110
1111 def cached_set(self):
1112 with self._lock:
1113 return set(self._contents)
1114
1115 def touch(self, digest, size):
1116 with self._lock:
1117 return digest in self._contents
1118
1119 def evict(self, digest):
1120 with self._lock:
1121 self._contents.pop(digest, None)
1122
1123 def read(self, digest):
1124 with self._lock:
1125 return self._contents[digest]
1126
1127 def write(self, digest, content):
1128 # Assemble whole stream before taking the lock.
1129 data = ''.join(content)
1130 with self._lock:
1131 self._contents[digest] = data
1132
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001133 def hardlink(self, digest, dest, file_mode):
1134 """Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001135 file_write(dest, [self.read(digest)])
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001136 if file_mode is not None:
1137 # Ignores all other bits.
1138 os.chmod(dest, file_mode & 0500)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001139
1140
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001141def get_hash_algo(_namespace):
1142 """Return hash algorithm class to use when uploading to given |namespace|."""
1143 # TODO(vadimsh): Implement this at some point.
1144 return hashlib.sha1
1145
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001146
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001147def is_namespace_with_compression(namespace):
1148 """Returns True if given |namespace| stores compressed objects."""
1149 return namespace.endswith(('-gzip', '-deflate'))
1150
1151
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001152def get_storage_api(file_or_url, namespace):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001153 """Returns an object that implements StorageApi interface."""
Marc-Antoine Ruel37989932013-11-19 16:28:08 -05001154 if file_path.is_url(file_or_url):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001155 return IsolateServer(file_or_url, namespace)
1156 else:
1157 return FileSystem(file_or_url)
1158
1159
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001160def get_storage(file_or_url, namespace):
1161 """Returns Storage class configured with appropriate StorageApi instance."""
1162 return Storage(
1163 get_storage_api(file_or_url, namespace),
1164 is_namespace_with_compression(namespace))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001165
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001166
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001167def expand_symlinks(indir, relfile):
1168 """Follows symlinks in |relfile|, but treating symlinks that point outside the
1169 build tree as if they were ordinary directories/files. Returns the final
1170 symlink-free target and a list of paths to symlinks encountered in the
1171 process.
1172
1173 The rule about symlinks outside the build tree is for the benefit of the
1174 Chromium OS ebuild, which symlinks the output directory to an unrelated path
1175 in the chroot.
1176
1177 Fails when a directory loop is detected, although in theory we could support
1178 that case.
1179 """
1180 is_directory = relfile.endswith(os.path.sep)
1181 done = indir
1182 todo = relfile.strip(os.path.sep)
1183 symlinks = []
1184
1185 while todo:
1186 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
1187 done, todo)
1188 if not symlink:
1189 todo = file_path.fix_native_path_case(done, todo)
1190 done = os.path.join(done, todo)
1191 break
1192 symlink_path = os.path.join(done, pre_symlink, symlink)
1193 post_symlink = post_symlink.lstrip(os.path.sep)
1194 # readlink doesn't exist on Windows.
1195 # pylint: disable=E1101
1196 target = os.path.normpath(os.path.join(done, pre_symlink))
1197 symlink_target = os.readlink(symlink_path)
1198 if os.path.isabs(symlink_target):
1199 # Absolute path are considered a normal directories. The use case is
1200 # generally someone who puts the output directory on a separate drive.
1201 target = symlink_target
1202 else:
1203 # The symlink itself could be using the wrong path case.
1204 target = file_path.fix_native_path_case(target, symlink_target)
1205
1206 if not os.path.exists(target):
1207 raise MappingError(
1208 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
1209 target = file_path.get_native_path_case(target)
1210 if not file_path.path_starts_with(indir, target):
1211 done = symlink_path
1212 todo = post_symlink
1213 continue
1214 if file_path.path_starts_with(target, symlink_path):
1215 raise MappingError(
1216 'Can\'t map recursive symlink reference %s -> %s' %
1217 (symlink_path, target))
1218 logging.info('Found symlink: %s -> %s', symlink_path, target)
1219 symlinks.append(os.path.relpath(symlink_path, indir))
1220 # Treat the common prefix of the old and new paths as done, and start
1221 # scanning again.
1222 target = target.split(os.path.sep)
1223 symlink_path = symlink_path.split(os.path.sep)
1224 prefix_length = 0
1225 for target_piece, symlink_path_piece in zip(target, symlink_path):
1226 if target_piece == symlink_path_piece:
1227 prefix_length += 1
1228 else:
1229 break
1230 done = os.path.sep.join(target[:prefix_length])
1231 todo = os.path.join(
1232 os.path.sep.join(target[prefix_length:]), post_symlink)
1233
1234 relfile = os.path.relpath(done, indir)
1235 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
1236 return relfile, symlinks
1237
1238
1239def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
1240 """Expands a single input. It can result in multiple outputs.
1241
1242 This function is recursive when relfile is a directory.
1243
1244 Note: this code doesn't properly handle recursive symlink like one created
1245 with:
1246 ln -s .. foo
1247 """
1248 if os.path.isabs(relfile):
1249 raise MappingError('Can\'t map absolute path %s' % relfile)
1250
1251 infile = file_path.normpath(os.path.join(indir, relfile))
1252 if not infile.startswith(indir):
1253 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
1254
1255 filepath = os.path.join(indir, relfile)
1256 native_filepath = file_path.get_native_path_case(filepath)
1257 if filepath != native_filepath:
1258 # Special case './'.
1259 if filepath != native_filepath + '.' + os.path.sep:
1260 # Give up enforcing strict path case on OSX. Really, it's that sad. The
1261 # case where it happens is very specific and hard to reproduce:
1262 # get_native_path_case(
1263 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
1264 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
1265 #
1266 # Note that this is really something deep in OSX because running
1267 # ls Foo.framework/Versions/A
1268 # will print out 'Resources', while file_path.get_native_path_case()
1269 # returns a lower case 'r'.
1270 #
1271 # So *something* is happening under the hood resulting in the command 'ls'
1272 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
1273 # have no idea why.
1274 if sys.platform != 'darwin':
1275 raise MappingError(
1276 'File path doesn\'t equal native file path\n%s != %s' %
1277 (filepath, native_filepath))
1278
1279 symlinks = []
1280 if follow_symlinks:
1281 relfile, symlinks = expand_symlinks(indir, relfile)
1282
1283 if relfile.endswith(os.path.sep):
1284 if not os.path.isdir(infile):
1285 raise MappingError(
1286 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
1287
1288 # Special case './'.
1289 if relfile.startswith('.' + os.path.sep):
1290 relfile = relfile[2:]
1291 outfiles = symlinks
1292 try:
1293 for filename in os.listdir(infile):
1294 inner_relfile = os.path.join(relfile, filename)
1295 if blacklist and blacklist(inner_relfile):
1296 continue
1297 if os.path.isdir(os.path.join(indir, inner_relfile)):
1298 inner_relfile += os.path.sep
1299 outfiles.extend(
1300 expand_directory_and_symlink(indir, inner_relfile, blacklist,
1301 follow_symlinks))
1302 return outfiles
1303 except OSError as e:
1304 raise MappingError(
1305 'Unable to iterate over directory %s.\n%s' % (infile, e))
1306 else:
1307 # Always add individual files even if they were blacklisted.
1308 if os.path.isdir(infile):
1309 raise MappingError(
1310 'Input directory %s must have a trailing slash' % infile)
1311
1312 if not os.path.isfile(infile):
1313 raise MappingError('Input file %s doesn\'t exist' % infile)
1314
1315 return symlinks + [relfile]
1316
1317
1318def process_input(filepath, prevdict, read_only, flavor, algo):
1319 """Processes an input file, a dependency, and return meta data about it.
1320
1321 Behaviors:
1322 - Retrieves the file mode, file size, file timestamp, file link
1323 destination if it is a file link and calcultate the SHA-1 of the file's
1324 content if the path points to a file and not a symlink.
1325
1326 Arguments:
1327 filepath: File to act on.
1328 prevdict: the previous dictionary. It is used to retrieve the cached sha-1
1329 to skip recalculating the hash. Optional.
1330 read_only: If True, the file mode is manipulated. In practice, only save
1331 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
1332 windows, mode is not set since all files are 'executable' by
1333 default.
1334 flavor: One isolated flavor, like 'linux', 'mac' or 'win'.
1335 algo: Hashing algorithm used.
1336
1337 Returns:
1338 The necessary data to create a entry in the 'files' section of an .isolated
1339 file.
1340 """
1341 out = {}
1342 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
1343 # if prevdict.get('T') == True:
1344 # # The file's content is ignored. Skip the time and hard code mode.
1345 # if get_flavor() != 'win':
1346 # out['m'] = stat.S_IRUSR | stat.S_IRGRP
1347 # out['s'] = 0
1348 # out['h'] = algo().hexdigest()
1349 # out['T'] = True
1350 # return out
1351
1352 # Always check the file stat and check if it is a link. The timestamp is used
1353 # to know if the file's content/symlink destination should be looked into.
1354 # E.g. only reuse from prevdict if the timestamp hasn't changed.
1355 # There is the risk of the file's timestamp being reset to its last value
1356 # manually while its content changed. We don't protect against that use case.
1357 try:
1358 filestats = os.lstat(filepath)
1359 except OSError:
1360 # The file is not present.
1361 raise MappingError('%s is missing' % filepath)
1362 is_link = stat.S_ISLNK(filestats.st_mode)
1363
1364 if flavor != 'win':
1365 # Ignore file mode on Windows since it's not really useful there.
1366 filemode = stat.S_IMODE(filestats.st_mode)
1367 # Remove write access for group and all access to 'others'.
1368 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
1369 if read_only:
1370 filemode &= ~stat.S_IWUSR
1371 if filemode & stat.S_IXUSR:
1372 filemode |= stat.S_IXGRP
1373 else:
1374 filemode &= ~stat.S_IXGRP
1375 if not is_link:
1376 out['m'] = filemode
1377
1378 # Used to skip recalculating the hash or link destination. Use the most recent
1379 # update time.
1380 # TODO(maruel): Save it in the .state file instead of .isolated so the
1381 # .isolated file is deterministic.
1382 out['t'] = int(round(filestats.st_mtime))
1383
1384 if not is_link:
1385 out['s'] = filestats.st_size
1386 # If the timestamp wasn't updated and the file size is still the same, carry
1387 # on the sha-1.
1388 if (prevdict.get('t') == out['t'] and
1389 prevdict.get('s') == out['s']):
1390 # Reuse the previous hash if available.
1391 out['h'] = prevdict.get('h')
1392 if not out.get('h'):
1393 out['h'] = hash_file(filepath, algo)
1394 else:
1395 # If the timestamp wasn't updated, carry on the link destination.
1396 if prevdict.get('t') == out['t']:
1397 # Reuse the previous link destination if available.
1398 out['l'] = prevdict.get('l')
1399 if out.get('l') is None:
1400 # The link could be in an incorrect path case. In practice, this only
1401 # happen on OSX on case insensitive HFS.
1402 # TODO(maruel): It'd be better if it was only done once, in
1403 # expand_directory_and_symlink(), so it would not be necessary to do again
1404 # here.
1405 symlink_value = os.readlink(filepath) # pylint: disable=E1101
1406 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
1407 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
1408 out['l'] = os.path.relpath(native_dest, filedir)
1409 return out
1410
1411
1412def save_isolated(isolated, data):
1413 """Writes one or multiple .isolated files.
1414
1415 Note: this reference implementation does not create child .isolated file so it
1416 always returns an empty list.
1417
1418 Returns the list of child isolated files that are included by |isolated|.
1419 """
1420 # Make sure the data is valid .isolated data by 'reloading' it.
1421 algo = SUPPORTED_ALGOS[data['algo']]
1422 load_isolated(json.dumps(data), data.get('flavor'), algo)
1423 tools.write_json(isolated, data, True)
1424 return []
1425
1426
1427
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001428def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001429 """Uploads the given tree to the given url.
1430
1431 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001432 base_url: The base url, it is assume that |base_url|/has/ can be used to
1433 query if an element was already uploaded, and |base_url|/store/
1434 can be used to upload a new element.
1435 indir: Root directory the infiles are based in.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001436 infiles: dict of files to upload from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001437 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001438 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001439 with get_storage(base_url, namespace) as storage:
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001440 storage.upload_tree(indir, infiles)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001441 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001442
1443
maruel@chromium.org41601642013-09-18 19:40:46 +00001444def load_isolated(content, os_flavor, algo):
1445 """Verifies the .isolated file is valid and loads this object with the json
1446 data.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001447
1448 Arguments:
1449 - content: raw serialized content to load.
1450 - os_flavor: OS to load this file on. Optional.
1451 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
1452 algorithm used on the Isolate Server.
maruel@chromium.org41601642013-09-18 19:40:46 +00001453 """
1454 try:
1455 data = json.loads(content)
1456 except ValueError:
1457 raise ConfigError('Failed to parse: %s...' % content[:100])
1458
1459 if not isinstance(data, dict):
1460 raise ConfigError('Expected dict, got %r' % data)
1461
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001462 # Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05001463 # TODO(maruel): Drop support for unversioned .isolated file around Jan 2014.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001464 value = data.get('version', '1.0')
1465 if not isinstance(value, basestring):
1466 raise ConfigError('Expected string, got %r' % value)
1467 if not re.match(r'^(\d+)\.(\d+)$', value):
1468 raise ConfigError('Expected a compatible version, got %r' % value)
1469 if value.split('.', 1)[0] != '1':
1470 raise ConfigError('Expected compatible \'1.x\' version, got %r' % value)
1471
1472 if algo is None:
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05001473 # TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001474 # Default the algorithm used in the .isolated file itself, falls back to
1475 # 'sha-1' if unspecified.
1476 algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
1477
maruel@chromium.org41601642013-09-18 19:40:46 +00001478 for key, value in data.iteritems():
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001479 if key == 'algo':
1480 if not isinstance(value, basestring):
1481 raise ConfigError('Expected string, got %r' % value)
1482 if value not in SUPPORTED_ALGOS:
1483 raise ConfigError(
1484 'Expected one of \'%s\', got %r' %
1485 (', '.join(sorted(SUPPORTED_ALGOS)), value))
1486 if value != SUPPORTED_ALGOS_REVERSE[algo]:
1487 raise ConfigError(
1488 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
1489
1490 elif key == 'command':
maruel@chromium.org41601642013-09-18 19:40:46 +00001491 if not isinstance(value, list):
1492 raise ConfigError('Expected list, got %r' % value)
1493 if not value:
1494 raise ConfigError('Expected non-empty command')
1495 for subvalue in value:
1496 if not isinstance(subvalue, basestring):
1497 raise ConfigError('Expected string, got %r' % subvalue)
1498
1499 elif key == 'files':
1500 if not isinstance(value, dict):
1501 raise ConfigError('Expected dict, got %r' % value)
1502 for subkey, subvalue in value.iteritems():
1503 if not isinstance(subkey, basestring):
1504 raise ConfigError('Expected string, got %r' % subkey)
1505 if not isinstance(subvalue, dict):
1506 raise ConfigError('Expected dict, got %r' % subvalue)
1507 for subsubkey, subsubvalue in subvalue.iteritems():
1508 if subsubkey == 'l':
1509 if not isinstance(subsubvalue, basestring):
1510 raise ConfigError('Expected string, got %r' % subsubvalue)
1511 elif subsubkey == 'm':
1512 if not isinstance(subsubvalue, int):
1513 raise ConfigError('Expected int, got %r' % subsubvalue)
1514 elif subsubkey == 'h':
1515 if not is_valid_hash(subsubvalue, algo):
1516 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
1517 elif subsubkey == 's':
Marc-Antoine Ruelaab3a622013-11-28 09:47:05 -05001518 if not isinstance(subsubvalue, (int, long)):
1519 raise ConfigError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001520 else:
1521 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001522 if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org41601642013-09-18 19:40:46 +00001523 raise ConfigError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001524 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
1525 subvalue)
1526 if bool('h' in subvalue) != bool('s' in subvalue):
1527 raise ConfigError(
1528 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
1529 subvalue)
1530 if bool('s' in subvalue) == bool('l' in subvalue):
1531 raise ConfigError(
1532 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
1533 subvalue)
1534 if bool('l' in subvalue) and bool('m' in subvalue):
1535 raise ConfigError(
1536 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org41601642013-09-18 19:40:46 +00001537 subvalue)
1538
1539 elif key == 'includes':
1540 if not isinstance(value, list):
1541 raise ConfigError('Expected list, got %r' % value)
1542 if not value:
1543 raise ConfigError('Expected non-empty includes list')
1544 for subvalue in value:
1545 if not is_valid_hash(subvalue, algo):
1546 raise ConfigError('Expected sha-1, got %r' % subvalue)
1547
1548 elif key == 'read_only':
1549 if not isinstance(value, bool):
1550 raise ConfigError('Expected bool, got %r' % value)
1551
1552 elif key == 'relative_cwd':
1553 if not isinstance(value, basestring):
1554 raise ConfigError('Expected string, got %r' % value)
1555
1556 elif key == 'os':
1557 if os_flavor and value != os_flavor:
1558 raise ConfigError(
1559 'Expected \'os\' to be \'%s\' but got \'%s\'' %
1560 (os_flavor, value))
1561
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001562 elif key == 'version':
1563 # Already checked above.
1564 pass
1565
maruel@chromium.org41601642013-09-18 19:40:46 +00001566 else:
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001567 raise ConfigError('Unknown key %r' % key)
maruel@chromium.org41601642013-09-18 19:40:46 +00001568
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001569 # Automatically fix os.path.sep if necessary. While .isolated files are always
1570 # in the the native path format, someone could want to download an .isolated
1571 # tree from another OS.
1572 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
1573 if 'files' in data:
1574 data['files'] = dict(
1575 (k.replace(wrong_path_sep, os.path.sep), v)
1576 for k, v in data['files'].iteritems())
1577 for v in data['files'].itervalues():
1578 if 'l' in v:
1579 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
1580 if 'relative_cwd' in data:
1581 data['relative_cwd'] = data['relative_cwd'].replace(
1582 wrong_path_sep, os.path.sep)
maruel@chromium.org41601642013-09-18 19:40:46 +00001583 return data
1584
1585
1586class IsolatedFile(object):
1587 """Represents a single parsed .isolated file."""
1588 def __init__(self, obj_hash, algo):
1589 """|obj_hash| is really the sha-1 of the file."""
1590 logging.debug('IsolatedFile(%s)' % obj_hash)
1591 self.obj_hash = obj_hash
1592 self.algo = algo
1593 # Set once all the left-side of the tree is parsed. 'Tree' here means the
1594 # .isolate and all the .isolated files recursively included by it with
1595 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1596 # .isolated file in the hash table, is important, as the later ones are not
1597 # processed until the firsts are retrieved and read.
1598 self.can_fetch = False
1599
1600 # Raw data.
1601 self.data = {}
1602 # A IsolatedFile instance, one per object in self.includes.
1603 self.children = []
1604
1605 # Set once the .isolated file is loaded.
1606 self._is_parsed = False
1607 # Set once the files are fetched.
1608 self.files_fetched = False
1609
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001610 def load(self, os_flavor, content):
maruel@chromium.org41601642013-09-18 19:40:46 +00001611 """Verifies the .isolated file is valid and loads this object with the json
1612 data.
1613 """
1614 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1615 assert not self._is_parsed
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001616 self.data = load_isolated(content, os_flavor, self.algo)
maruel@chromium.org41601642013-09-18 19:40:46 +00001617 self.children = [
1618 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
1619 ]
1620 self._is_parsed = True
1621
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001622 def fetch_files(self, fetch_queue, files):
maruel@chromium.org41601642013-09-18 19:40:46 +00001623 """Adds files in this .isolated file not present in |files| dictionary.
1624
1625 Preemptively request files.
1626
1627 Note that |files| is modified by this function.
1628 """
1629 assert self.can_fetch
1630 if not self._is_parsed or self.files_fetched:
1631 return
1632 logging.debug('fetch_files(%s)' % self.obj_hash)
1633 for filepath, properties in self.data.get('files', {}).iteritems():
1634 # Root isolated has priority on the files being mapped. In particular,
1635 # overriden files must not be fetched.
1636 if filepath not in files:
1637 files[filepath] = properties
1638 if 'h' in properties:
1639 # Preemptively request files.
1640 logging.debug('fetching %s' % filepath)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001641 fetch_queue.add(WorkerPool.MED, properties['h'], properties['s'])
maruel@chromium.org41601642013-09-18 19:40:46 +00001642 self.files_fetched = True
1643
1644
1645class Settings(object):
1646 """Results of a completely parsed .isolated file."""
1647 def __init__(self):
1648 self.command = []
1649 self.files = {}
1650 self.read_only = None
1651 self.relative_cwd = None
1652 # The main .isolated file, a IsolatedFile instance.
1653 self.root = None
1654
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001655 def load(self, fetch_queue, root_isolated_hash, os_flavor, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001656 """Loads the .isolated and all the included .isolated asynchronously.
1657
1658 It enables support for "included" .isolated files. They are processed in
1659 strict order but fetched asynchronously from the cache. This is important so
1660 that a file in an included .isolated file that is overridden by an embedding
1661 .isolated file is not fetched needlessly. The includes are fetched in one
1662 pass and the files are fetched as soon as all the ones on the left-side
1663 of the tree were fetched.
1664
1665 The prioritization is very important here for nested .isolated files.
1666 'includes' have the highest priority and the algorithm is optimized for both
1667 deep and wide trees. A deep one is a long link of .isolated files referenced
1668 one at a time by one item in 'includes'. A wide one has a large number of
1669 'includes' in a single .isolated file. 'left' is defined as an included
1670 .isolated file earlier in the 'includes' list. So the order of the elements
1671 in 'includes' is important.
1672 """
1673 self.root = IsolatedFile(root_isolated_hash, algo)
1674
1675 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1676 pending = {}
1677 # Set of hashes of already retrieved items to refuse recursive includes.
1678 seen = set()
1679
1680 def retrieve(isolated_file):
1681 h = isolated_file.obj_hash
1682 if h in seen:
1683 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1684 assert h not in pending
1685 seen.add(h)
1686 pending[h] = isolated_file
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001687 fetch_queue.add(WorkerPool.HIGH, h)
maruel@chromium.org41601642013-09-18 19:40:46 +00001688
1689 retrieve(self.root)
1690
1691 while pending:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001692 item_hash = fetch_queue.wait(pending)
maruel@chromium.org41601642013-09-18 19:40:46 +00001693 item = pending.pop(item_hash)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001694 item.load(os_flavor, fetch_queue.cache.read(item_hash))
maruel@chromium.org41601642013-09-18 19:40:46 +00001695 if item_hash == root_isolated_hash:
1696 # It's the root item.
1697 item.can_fetch = True
1698
1699 for new_child in item.children:
1700 retrieve(new_child)
1701
1702 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001703 self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org41601642013-09-18 19:40:46 +00001704
1705 def check(n):
1706 return all(check(x) for x in n.children) and n.files_fetched
1707 assert check(self.root)
1708
1709 self.relative_cwd = self.relative_cwd or ''
1710 self.read_only = self.read_only or False
1711
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001712 def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org41601642013-09-18 19:40:46 +00001713 if node.can_fetch:
1714 if not node.files_fetched:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001715 self._update_self(fetch_queue, node)
maruel@chromium.org41601642013-09-18 19:40:46 +00001716 will_break = False
1717 for i in node.children:
1718 if not i.can_fetch:
1719 if will_break:
1720 break
1721 # Automatically mark the first one as fetcheable.
1722 i.can_fetch = True
1723 will_break = True
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001724 self._traverse_tree(fetch_queue, i)
maruel@chromium.org41601642013-09-18 19:40:46 +00001725
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001726 def _update_self(self, fetch_queue, node):
1727 node.fetch_files(fetch_queue, self.files)
maruel@chromium.org41601642013-09-18 19:40:46 +00001728 # Grabs properties.
1729 if not self.command and node.data.get('command'):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001730 # Ensure paths are correctly separated on windows.
maruel@chromium.org41601642013-09-18 19:40:46 +00001731 self.command = node.data['command']
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001732 if self.command:
1733 self.command[0] = self.command[0].replace('/', os.path.sep)
1734 self.command = tools.fix_python_path(self.command)
maruel@chromium.org41601642013-09-18 19:40:46 +00001735 if self.read_only is None and node.data.get('read_only') is not None:
1736 self.read_only = node.data['read_only']
1737 if (self.relative_cwd is None and
1738 node.data.get('relative_cwd') is not None):
1739 self.relative_cwd = node.data['relative_cwd']
1740
1741
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001742def fetch_isolated(
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001743 isolated_hash, storage, cache, algo, outdir, os_flavor, require_command):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001744 """Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001745
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001746 Arguments:
1747 isolated_hash: hash of the root *.isolated file.
1748 storage: Storage class that communicates with isolate storage.
1749 cache: LocalCache class that knows how to store and map files locally.
1750 algo: hash algorithm to use.
1751 outdir: Output directory to map file tree to.
1752 os_flavor: OS flavor to choose when reading sections of *.isolated file.
1753 require_command: Ensure *.isolated specifies a command to run.
1754
1755 Returns:
1756 Settings object that holds details about loaded *.isolated file.
1757 """
1758 with cache:
1759 fetch_queue = FetchQueue(storage, cache)
1760 settings = Settings()
1761
1762 with tools.Profiler('GetIsolateds'):
1763 # Optionally support local files by manually adding them to cache.
1764 if not is_valid_hash(isolated_hash, algo):
1765 isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
1766
1767 # Load all *.isolated and start loading rest of the files.
1768 settings.load(fetch_queue, isolated_hash, os_flavor, algo)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001769 if require_command and not settings.command:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001770 # TODO(vadimsh): All fetch operations are already enqueue and there's no
1771 # easy way to cancel them.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001772 raise ConfigError('No command to run')
1773
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001774 with tools.Profiler('GetRest'):
1775 # Create file system hierarchy.
1776 if not os.path.isdir(outdir):
1777 os.makedirs(outdir)
1778 create_directories(outdir, settings.files)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -05001779 create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001780
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001781 # Ensure working directory exists.
1782 cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
1783 if not os.path.isdir(cwd):
1784 os.makedirs(cwd)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001785
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001786 # Multimap: digest -> list of pairs (path, props).
1787 remaining = {}
1788 for filepath, props in settings.files.iteritems():
1789 if 'h' in props:
1790 remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001791
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001792 # Now block on the remaining files to be downloaded and mapped.
1793 logging.info('Retrieving remaining files (%d of them)...',
1794 fetch_queue.pending_count)
1795 last_update = time.time()
1796 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
1797 while remaining:
1798 detector.ping()
1799
1800 # Wait for any item to finish fetching to cache.
1801 digest = fetch_queue.wait(remaining)
1802
1803 # Link corresponding files to a fetched item in cache.
1804 for filepath, props in remaining.pop(digest):
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001805 cache.hardlink(
1806 digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001807
1808 # Report progress.
1809 duration = time.time() - last_update
1810 if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
1811 msg = '%d files remaining...' % len(remaining)
1812 print msg
1813 logging.info(msg)
1814 last_update = time.time()
1815
1816 # Cache could evict some items we just tried to fetch, it's a fatal error.
1817 if not fetch_queue.verify_all_cached():
1818 raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001819 return settings
1820
1821
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001822def directory_to_metadata(root, algo, blacklist):
1823 """Returns the FileItem list and .isolated metadata for a directory."""
1824 root = file_path.get_native_path_case(root)
1825 metadata = dict(
1826 (relpath, process_input(
1827 os.path.join(root, relpath), {}, False, sys.platform, algo))
1828 for relpath in expand_directory_and_symlink(
1829 root, './', blacklist, True)
1830 )
1831 for v in metadata.itervalues():
1832 v.pop('t')
1833 items = [
1834 FileItem(
1835 path=os.path.join(root, relpath),
1836 digest=meta['h'],
1837 size=meta['s'],
1838 is_isolated=relpath.endswith('.isolated'))
1839 for relpath, meta in metadata.iteritems() if 'h' in meta
1840 ]
1841 return items, metadata
1842
1843
1844def archive(storage, algo, files, blacklist):
1845 """Stores every entries and returns the relevant data."""
1846 assert all(isinstance(i, unicode) for i in files), files
1847 if len(files) != len(set(map(os.path.abspath, files))):
1848 raise Error('Duplicate entries found.')
1849
1850 results = []
1851 # The temporary directory is only created as needed.
1852 tempdir = None
1853 try:
1854 # TODO(maruel): Yield the files to a worker thread.
1855 items_to_upload = []
1856 for f in files:
1857 try:
1858 filepath = os.path.abspath(f)
1859 if os.path.isdir(filepath):
1860 # Uploading a whole directory.
1861 items, metadata = directory_to_metadata(filepath, algo, blacklist)
1862
1863 # Create the .isolated file.
1864 if not tempdir:
1865 tempdir = tempfile.mkdtemp(prefix='isolateserver')
1866 handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
1867 os.close(handle)
1868 data = {
1869 'algo': SUPPORTED_ALGOS_REVERSE[algo],
1870 'files': metadata,
1871 'version': '1.0',
1872 }
1873 save_isolated(isolated, data)
1874 h = hash_file(isolated, algo)
1875 items_to_upload.extend(items)
1876 items_to_upload.append(
1877 FileItem(
1878 path=isolated,
1879 digest=h,
1880 size=os.stat(isolated).st_size,
1881 is_isolated=True))
1882 results.append((h, f))
1883
1884 elif os.path.isfile(filepath):
1885 h = hash_file(filepath, algo)
1886 items_to_upload.append(
1887 FileItem(
1888 path=filepath,
1889 digest=h,
1890 size=os.stat(filepath).st_size,
1891 is_isolated=f.endswith('.isolated')))
1892 results.append((h, f))
1893 else:
1894 raise Error('%s is neither a file or directory.' % f)
1895 except OSError:
1896 raise Error('Failed to process %s.' % f)
1897 # Technically we would care about the uploaded files but we don't much in
1898 # practice.
1899 _uploaded_files = storage.upload_items(items_to_upload)
1900 return results
1901 finally:
1902 if tempdir:
1903 shutil.rmtree(tempdir)
1904
1905
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001906@subcommand.usage('<file1..fileN> or - to read from stdin')
1907def CMDarchive(parser, args):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001908 """Archives data to the server.
1909
1910 If a directory is specified, a .isolated file is created the whole directory
1911 is uploaded. Then this .isolated file can be included in another one to run
1912 commands.
1913
1914 The commands output each file that was processed with its content hash. For
1915 directories, the .isolated generated for the directory is listed as the
1916 directory entry itself.
1917 """
1918 parser.add_option(
1919 '--blacklist',
1920 action='append', default=list(DEFAULT_BLACKLIST),
1921 help='List of regexp to use as blacklist filter when uploading '
1922 'directories')
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001923 options, files = parser.parse_args(args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001924
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001925 if files == ['-']:
1926 files = sys.stdin.readlines()
1927
1928 if not files:
1929 parser.error('Nothing to upload')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001930
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001931 files = [f.decode('utf-8') for f in files]
1932 algo = get_hash_algo(options.namespace)
1933 blacklist = tools.gen_blacklist(options.blacklist)
1934 try:
1935 with get_storage(options.isolate_server, options.namespace) as storage:
1936 results = archive(storage, algo, files, blacklist)
1937 except Error as e:
1938 parser.error(e.args[0])
1939 print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
1940 return 0
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001941
1942
1943def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001944 """Download data from the server.
1945
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001946 It can either download individual files or a complete tree from a .isolated
1947 file.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001948 """
1949 parser.add_option(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001950 '-i', '--isolated', metavar='HASH',
1951 help='hash of an isolated file, .isolated file content is discarded, use '
1952 '--file if you need it')
1953 parser.add_option(
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001954 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
1955 help='hash and destination of a file, can be used multiple times')
1956 parser.add_option(
1957 '-t', '--target', metavar='DIR', default=os.getcwd(),
1958 help='destination directory')
1959 options, args = parser.parse_args(args)
1960 if args:
1961 parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001962 if bool(options.isolated) == bool(options.file):
1963 parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001964
1965 options.target = os.path.abspath(options.target)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001966 storage = get_storage(options.isolate_server, options.namespace)
1967 cache = MemoryCache()
1968 algo = get_hash_algo(options.namespace)
1969
1970 # Fetching individual files.
1971 if options.file:
1972 channel = threading_utils.TaskChannel()
1973 pending = {}
1974 for digest, dest in options.file:
1975 pending[digest] = dest
1976 storage.async_fetch(
1977 channel,
1978 WorkerPool.MED,
1979 digest,
1980 UNKNOWN_FILE_SIZE,
1981 functools.partial(file_write, os.path.join(options.target, dest)))
1982 while pending:
1983 fetched = channel.pull()
1984 dest = pending.pop(fetched)
1985 logging.info('%s: %s', fetched, dest)
1986
1987 # Fetching whole isolated tree.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001988 if options.isolated:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001989 settings = fetch_isolated(
1990 isolated_hash=options.isolated,
1991 storage=storage,
1992 cache=cache,
1993 algo=algo,
1994 outdir=options.target,
1995 os_flavor=None,
1996 require_command=False)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001997 rel = os.path.join(options.target, settings.relative_cwd)
1998 print('To run this test please run from the directory %s:' %
1999 os.path.join(options.target, rel))
2000 print(' ' + ' '.join(settings.command))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002001
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002002 return 0
2003
2004
2005class OptionParserIsolateServer(tools.OptionParserWithLogging):
2006 def __init__(self, **kwargs):
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002007 tools.OptionParserWithLogging.__init__(
2008 self,
2009 version=__version__,
2010 prog=os.path.basename(sys.modules[__name__].__file__),
2011 **kwargs)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002012 self.add_option(
2013 '-I', '--isolate-server',
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00002014 metavar='URL', default='',
2015 help='Isolate server to use')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002016 self.add_option(
2017 '--namespace', default='default-gzip',
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002018 help='The namespace to use on the server, default: %default')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002019
2020 def parse_args(self, *args, **kwargs):
2021 options, args = tools.OptionParserWithLogging.parse_args(
2022 self, *args, **kwargs)
2023 options.isolate_server = options.isolate_server.rstrip('/')
2024 if not options.isolate_server:
2025 self.error('--isolate-server is required.')
2026 return options, args
2027
2028
2029def main(args):
2030 dispatcher = subcommand.CommandDispatcher(__name__)
2031 try:
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002032 return dispatcher.execute(OptionParserIsolateServer(), args)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00002033 except Exception as e:
2034 tools.report_error(e)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002035 return 1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00002036
2037
2038if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002039 fix_encoding.fix_encoding()
2040 tools.disable_buffering()
2041 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002042 sys.exit(main(sys.argv[1:]))