blob: f41a42c5d1811b699c8b60201ab2d69bd15b8a82 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00005
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05006"""Archives a set of files or directories to a server."""
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00007
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04008__version__ = '0.3.3'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +000010import functools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000011import hashlib
maruel@chromium.org41601642013-09-18 19:40:46 +000012import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000013import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000014import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000015import re
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050016import shutil
17import stat
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000018import sys
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050019import tempfile
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000020import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000021import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000022import urllib
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050023import urlparse
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000024import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000025
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000026from third_party import colorama
27from third_party.depot_tools import fix_encoding
28from third_party.depot_tools import subcommand
29
Marc-Antoine Ruel37989932013-11-19 16:28:08 -050030from utils import file_path
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000031from utils import net
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000032from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000033from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000034
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080035import auth
36
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000037
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000038# Version of isolate protocol passed to the server in /handshake request.
39ISOLATE_PROTOCOL_VERSION = '1.0'
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -050040# Version stored and expected in .isolated files.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -040041ISOLATED_FILE_VERSION = '1.4'
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000042
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000043
44# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000045# All files are sorted by likelihood of a change in the file content
46# (currently file size is used to estimate this: larger the file -> larger the
47# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000048# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000049# and so on. Numbers here is a trade-off; the more per request, the lower the
50# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
51# larger values cause longer lookups, increasing the initial latency to start
52# uploading, which is especially an issue for large files. This value is
53# optimized for the "few thousands files to look up with minimal number of large
54# files missing" case.
55ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000056
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000057
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000058# A list of already compressed extension types that should not receive any
59# compression before being uploaded.
60ALREADY_COMPRESSED_TYPES = [
61 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'pdf', 'png',
62 'wav', 'zip'
63]
64
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000065
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000066# The file size to be used when we don't know the correct file size,
67# generally used for .isolated files.
68UNKNOWN_FILE_SIZE = None
69
70
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000071# Chunk size to use when doing disk I/O.
72DISK_FILE_CHUNK = 1024 * 1024
73
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000074# Chunk size to use when reading from network stream.
75NET_IO_FILE_CHUNK = 16 * 1024
76
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000077
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000078# Read timeout in seconds for downloads from isolate storage. If there's no
79# response from the server within this timeout whole download will be aborted.
80DOWNLOAD_READ_TIMEOUT = 60
81
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +000082# Maximum expected delay (in seconds) between successive file fetches
83# in run_tha_test. If it takes longer than that, a deadlock might be happening
84# and all stack frames for all threads are dumped to log.
85DEADLOCK_TIMEOUT = 5 * 60
86
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000087
maruel@chromium.org41601642013-09-18 19:40:46 +000088# The delay (in seconds) to wait between logging statements when retrieving
89# the required files. This is intended to let the user (or buildbot) know that
90# the program is still running.
91DELAY_BETWEEN_UPDATES_IN_SECS = 30
92
93
maruel@chromium.org385d73d2013-09-19 18:33:21 +000094# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
95# specify the names here.
96SUPPORTED_ALGOS = {
97 'md5': hashlib.md5,
98 'sha-1': hashlib.sha1,
99 'sha-512': hashlib.sha512,
100}
101
102
103# Used for serialization.
104SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
105
106
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -0500107DEFAULT_BLACKLIST = (
108 # Temporary vim or python files.
109 r'^.+\.(?:pyc|swp)$',
110 # .git or .svn directory.
111 r'^(?:.+' + re.escape(os.path.sep) + r'|)\.(?:git|svn)$',
112)
113
114
115# Chromium-specific.
116DEFAULT_BLACKLIST += (
117 r'^.+\.(?:run_test_cases)$',
118 r'^(?:.+' + re.escape(os.path.sep) + r'|)testserver\.log$',
119)
120
121
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -0500122class Error(Exception):
123 """Generic runtime error."""
124 pass
125
126
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000127class ConfigError(ValueError):
128 """Generic failure to load a .isolated file."""
129 pass
130
131
132class MappingError(OSError):
133 """Failed to recreate the tree."""
134 pass
135
136
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000137def is_valid_hash(value, algo):
138 """Returns if the value is a valid hash for the corresponding algorithm."""
139 size = 2 * algo().digest_size
140 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
141
142
143def hash_file(filepath, algo):
144 """Calculates the hash of a file without reading it all in memory at once.
145
146 |algo| should be one of hashlib hashing algorithm.
147 """
148 digest = algo()
maruel@chromium.org037758d2012-12-10 17:59:46 +0000149 with open(filepath, 'rb') as f:
150 while True:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000151 chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org037758d2012-12-10 17:59:46 +0000152 if not chunk:
153 break
154 digest.update(chunk)
155 return digest.hexdigest()
156
157
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000158def stream_read(stream, chunk_size):
159 """Reads chunks from |stream| and yields them."""
160 while True:
161 data = stream.read(chunk_size)
162 if not data:
163 break
164 yield data
165
166
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800167def file_read(filepath, chunk_size=DISK_FILE_CHUNK, offset=0):
168 """Yields file content in chunks of |chunk_size| starting from |offset|."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000169 with open(filepath, 'rb') as f:
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800170 if offset:
171 f.seek(offset)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000172 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000173 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000174 if not data:
175 break
176 yield data
177
178
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000179def file_write(filepath, content_generator):
180 """Writes file content as generated by content_generator.
181
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000182 Creates the intermediary directory as needed.
183
184 Returns the number of bytes written.
185
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000186 Meant to be mocked out in unit tests.
187 """
188 filedir = os.path.dirname(filepath)
189 if not os.path.isdir(filedir):
190 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000191 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000192 with open(filepath, 'wb') as f:
193 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000194 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000195 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000196 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000197
198
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000199def zip_compress(content_generator, level=7):
200 """Reads chunks from |content_generator| and yields zip compressed chunks."""
201 compressor = zlib.compressobj(level)
202 for chunk in content_generator:
203 compressed = compressor.compress(chunk)
204 if compressed:
205 yield compressed
206 tail = compressor.flush(zlib.Z_FINISH)
207 if tail:
208 yield tail
209
210
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000211def zip_decompress(content_generator, chunk_size=DISK_FILE_CHUNK):
212 """Reads zipped data from |content_generator| and yields decompressed data.
213
214 Decompresses data in small chunks (no larger than |chunk_size|) so that
215 zip bomb file doesn't cause zlib to preallocate huge amount of memory.
216
217 Raises IOError if data is corrupted or incomplete.
218 """
219 decompressor = zlib.decompressobj()
220 compressed_size = 0
221 try:
222 for chunk in content_generator:
223 compressed_size += len(chunk)
224 data = decompressor.decompress(chunk, chunk_size)
225 if data:
226 yield data
227 while decompressor.unconsumed_tail:
228 data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
229 if data:
230 yield data
231 tail = decompressor.flush()
232 if tail:
233 yield tail
234 except zlib.error as e:
235 raise IOError(
236 'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
237 # Ensure all data was read and decompressed.
238 if decompressor.unused_data or decompressor.unconsumed_tail:
239 raise IOError('Not all data was decompressed')
240
241
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000242def get_zip_compression_level(filename):
243 """Given a filename calculates the ideal zip compression level to use."""
244 file_ext = os.path.splitext(filename)[1].lower()
245 # TODO(csharp): Profile to find what compression level works best.
246 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
247
248
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000249def create_directories(base_directory, files):
250 """Creates the directory structure needed by the given list of files."""
251 logging.debug('create_directories(%s, %d)', base_directory, len(files))
252 # Creates the tree of directories to create.
253 directories = set(os.path.dirname(f) for f in files)
254 for item in list(directories):
255 while item:
256 directories.add(item)
257 item = os.path.dirname(item)
258 for d in sorted(directories):
259 if d:
260 os.mkdir(os.path.join(base_directory, d))
261
262
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500263def create_symlinks(base_directory, files):
264 """Creates any symlinks needed by the given set of files."""
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000265 for filepath, properties in files:
266 if 'l' not in properties:
267 continue
268 if sys.platform == 'win32':
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500269 # TODO(maruel): Create symlink via the win32 api.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000270 logging.warning('Ignoring symlink %s', filepath)
271 continue
272 outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500273 # os.symlink() doesn't exist on Windows.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000274 os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000275
276
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000277def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000278 """Determines if the given files appears valid.
279
280 Currently it just checks the file's size.
281 """
282 if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000283 return os.path.isfile(filepath)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000284 actual_size = os.stat(filepath).st_size
285 if size != actual_size:
286 logging.warning(
287 'Found invalid item %s; %d != %d',
288 os.path.basename(filepath), actual_size, size)
289 return False
290 return True
291
292
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000293class WorkerPool(threading_utils.AutoRetryThreadPool):
294 """Thread pool that automatically retries on IOError and runs a preconfigured
295 function.
296 """
297 # Initial and maximum number of worker threads.
298 INITIAL_WORKERS = 2
299 MAX_WORKERS = 16
300 RETRIES = 5
301
302 def __init__(self):
303 super(WorkerPool, self).__init__(
304 [IOError],
305 self.RETRIES,
306 self.INITIAL_WORKERS,
307 self.MAX_WORKERS,
308 0,
309 'remote')
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000310
311
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000312class Item(object):
313 """An item to push to Storage.
314
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800315 Its digest and size may be provided in advance, if known. Otherwise they will
316 be derived from content(). If digest is provided, it MUST correspond to
317 hash algorithm used by Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000318
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800319 When used with Storage, Item starts its life in a main thread, travels
320 to 'contains' thread, then to 'push' thread and then finally back to
321 the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000322 """
323
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800324 def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000325 self.digest = digest
326 self.size = size
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800327 self.high_priority = high_priority
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000328 self.compression_level = 6
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000329
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800330 def content(self):
331 """Iterable with content of this item as byte string (str) chunks."""
332 raise NotImplementedError()
333
334 def prepare(self, hash_algo):
335 """Ensures self.digest and self.size are set.
336
337 Uses content() as a source of data to calculate them. Does nothing if digest
338 and size is already known.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000339
340 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800341 hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000342 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800343 if self.digest is None or self.size is None:
344 digest = hash_algo()
345 total = 0
346 for chunk in self.content():
347 digest.update(chunk)
348 total += len(chunk)
349 self.digest = digest.hexdigest()
350 self.size = total
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000351
352
353class FileItem(Item):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800354 """A file to push to Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000355
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800356 Its digest and size may be provided in advance, if known. Otherwise they will
357 be derived from the file content.
358 """
359
360 def __init__(self, path, digest=None, size=None, high_priority=False):
361 super(FileItem, self).__init__(
362 digest,
363 size if size is not None else os.stat(path).st_size,
364 high_priority)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000365 self.path = path
366 self.compression_level = get_zip_compression_level(path)
367
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800368 def content(self):
369 return file_read(self.path)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000370
371
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000372class BufferItem(Item):
373 """A byte buffer to push to Storage."""
374
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800375 def __init__(self, buf, high_priority=False):
376 super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000377 self.buffer = buf
378
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800379 def content(self):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000380 return [self.buffer]
381
382
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000383class Storage(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800384 """Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000385
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800386 Implements compression support, parallel 'contains' checks, parallel uploads
387 and more.
388
389 Works only within single namespace (and thus hashing algorithm and compression
390 scheme are fixed).
391
392 Spawns multiple internal threads. Thread safe, but not fork safe.
393 """
394
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700395 def __init__(self, storage_api):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000396 self._storage_api = storage_api
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700397 self._use_zip = is_namespace_with_compression(storage_api.namespace)
398 self._hash_algo = get_hash_algo(storage_api.namespace)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000399 self._cpu_thread_pool = None
400 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000401
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000402 @property
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700403 def hash_algo(self):
404 """Hashing algorithm used to name files in storage based on their content.
405
406 Defined by |namespace|. See also 'get_hash_algo'.
407 """
408 return self._hash_algo
409
410 @property
411 def location(self):
412 """Location of a backing store that this class is using.
413
414 Exact meaning depends on the storage_api type. For IsolateServer it is
415 an URL of isolate server, for FileSystem is it a path in file system.
416 """
417 return self._storage_api.location
418
419 @property
420 def namespace(self):
421 """Isolate namespace used by this storage.
422
423 Indirectly defines hashing scheme and compression method used.
424 """
425 return self._storage_api.namespace
426
427 @property
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000428 def cpu_thread_pool(self):
429 """ThreadPool for CPU-bound tasks like zipping."""
430 if self._cpu_thread_pool is None:
431 self._cpu_thread_pool = threading_utils.ThreadPool(
432 2, max(threading_utils.num_processors(), 2), 0, 'zip')
433 return self._cpu_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000434
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000435 @property
436 def net_thread_pool(self):
437 """AutoRetryThreadPool for IO-bound tasks, retries IOError."""
438 if self._net_thread_pool is None:
439 self._net_thread_pool = WorkerPool()
440 return self._net_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000441
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000442 def close(self):
443 """Waits for all pending tasks to finish."""
444 if self._cpu_thread_pool:
445 self._cpu_thread_pool.join()
446 self._cpu_thread_pool.close()
447 self._cpu_thread_pool = None
448 if self._net_thread_pool:
449 self._net_thread_pool.join()
450 self._net_thread_pool.close()
451 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000452
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000453 def __enter__(self):
454 """Context manager interface."""
455 return self
456
457 def __exit__(self, _exc_type, _exc_value, _traceback):
458 """Context manager interface."""
459 self.close()
460 return False
461
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000462 def upload_items(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800463 """Uploads a bunch of items to the isolate server.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000464
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800465 It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000466
467 Arguments:
468 items: list of Item instances that represents data to upload.
469
470 Returns:
471 List of items that were uploaded. All other items are already there.
472 """
473 # TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
474 # used by swarming.py. There's no need to spawn multiple threads and try to
475 # do stuff in parallel: there's nothing to parallelize. 'contains' check and
476 # 'push' should be performed sequentially in the context of current thread.
477
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800478 # Ensure all digests are calculated.
479 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700480 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800481
vadimsh@chromium.org672cd2b2013-10-08 17:49:33 +0000482 # For each digest keep only first Item that matches it. All other items
483 # are just indistinguishable copies from the point of view of isolate
484 # server (it doesn't care about paths at all, only content and digests).
485 seen = {}
486 duplicates = 0
487 for item in items:
488 if seen.setdefault(item.digest, item) is not item:
489 duplicates += 1
490 items = seen.values()
491 if duplicates:
492 logging.info('Skipped %d duplicated files', duplicates)
493
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000494 # Enqueue all upload tasks.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000495 missing = set()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000496 uploaded = []
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800497 channel = threading_utils.TaskChannel()
498 for missing_item, push_state in self.get_missing_items(items):
499 missing.add(missing_item)
500 self.async_push(channel, missing_item, push_state)
501
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000502 # No need to spawn deadlock detector thread if there's nothing to upload.
503 if missing:
504 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
505 # Wait for all started uploads to finish.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000506 while len(uploaded) != len(missing):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000507 detector.ping()
508 item = channel.pull()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000509 uploaded.append(item)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000510 logging.debug(
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000511 'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000512 logging.info('All files are uploaded')
513
514 # Print stats.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000515 total = len(items)
516 total_size = sum(f.size for f in items)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000517 logging.info(
518 'Total: %6d, %9.1fkb',
519 total,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000520 total_size / 1024.)
521 cache_hit = set(items) - missing
522 cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000523 logging.info(
524 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
525 len(cache_hit),
526 cache_hit_size / 1024.,
527 len(cache_hit) * 100. / total,
528 cache_hit_size * 100. / total_size if total_size else 0)
529 cache_miss = missing
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000530 cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000531 logging.info(
532 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
533 len(cache_miss),
534 cache_miss_size / 1024.,
535 len(cache_miss) * 100. / total,
536 cache_miss_size * 100. / total_size if total_size else 0)
537
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000538 return uploaded
539
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800540 def get_fetch_url(self, item):
541 """Returns an URL that can be used to fetch given item once it's uploaded.
542
543 Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000544
545 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800546 item: Item to get fetch URL for.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000547
548 Returns:
549 An URL or None if underlying protocol doesn't support this.
550 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700551 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800552 return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000553
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800554 def async_push(self, channel, item, push_state):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000555 """Starts asynchronous push to the server in a parallel thread.
556
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800557 Can be used only after |item| was checked for presence on a server with
558 'get_missing_items' call. 'get_missing_items' returns |push_state| object
559 that contains storage specific information describing how to upload
560 the item (for example in case of cloud storage, it is signed upload URLs).
561
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000562 Arguments:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000563 channel: TaskChannel that receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000564 item: item to upload as instance of Item class.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800565 push_state: push state returned by 'get_missing_items' call for |item|.
566
567 Returns:
568 None, but |channel| later receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000569 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800570 # Thread pool task priority.
571 priority = WorkerPool.HIGH if item.high_priority else WorkerPool.MED
572
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000573 def push(content):
Marc-Antoine Ruel095a8be2014-03-21 14:58:19 -0400574 """Pushes an Item and returns it to |channel|."""
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700575 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800576 self._storage_api.push(item, push_state, content)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000577 return item
578
579 # If zipping is not required, just start a push task.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700580 if not self._use_zip:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800581 self.net_thread_pool.add_task_with_channel(
582 channel, priority, push, item.content())
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000583 return
584
585 # If zipping is enabled, zip in a separate thread.
586 def zip_and_push():
587 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
588 # content right here. It will block until all file is zipped.
589 try:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800590 stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000591 data = ''.join(stream)
592 except Exception as exc:
593 logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800594 channel.send_exception()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000595 return
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000596 self.net_thread_pool.add_task_with_channel(
597 channel, priority, push, [data])
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000598 self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000599
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800600 def push(self, item, push_state):
601 """Synchronously pushes a single item to the server.
602
603 If you need to push many items at once, consider using 'upload_items' or
604 'async_push' with instance of TaskChannel.
605
606 Arguments:
607 item: item to upload as instance of Item class.
608 push_state: push state returned by 'get_missing_items' call for |item|.
609
610 Returns:
611 Pushed item (same object as |item|).
612 """
613 channel = threading_utils.TaskChannel()
614 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
615 self.async_push(channel, item, push_state)
616 pushed = channel.pull()
617 assert pushed is item
618 return item
619
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000620 def async_fetch(self, channel, priority, digest, size, sink):
621 """Starts asynchronous fetch from the server in a parallel thread.
622
623 Arguments:
624 channel: TaskChannel that receives back |digest| when download ends.
625 priority: thread pool task priority for the fetch.
626 digest: hex digest of an item to download.
627 size: expected size of the item (after decompression).
628 sink: function that will be called as sink(generator).
629 """
630 def fetch():
631 try:
632 # Prepare reading pipeline.
633 stream = self._storage_api.fetch(digest)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700634 if self._use_zip:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000635 stream = zip_decompress(stream, DISK_FILE_CHUNK)
636 # Run |stream| through verifier that will assert its size.
637 verifier = FetchStreamVerifier(stream, size)
638 # Verified stream goes to |sink|.
639 sink(verifier.run())
640 except Exception as err:
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800641 logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000642 raise
643 return digest
644
645 # Don't bother with zip_thread_pool for decompression. Decompression is
646 # really fast and most probably IO bound anyway.
647 self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
648
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000649 def get_missing_items(self, items):
650 """Yields items that are missing from the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000651
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000652 Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000653
654 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000655 items: a list of Item objects to check.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000656
657 Yields:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800658 For each missing item it yields a pair (item, push_state), where:
659 * item - Item object that is missing (one of |items|).
660 * push_state - opaque object that contains storage specific information
661 describing how to upload the item (for example in case of cloud
662 storage, it is signed upload URLs). It can later be passed to
663 'async_push'.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000664 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000665 channel = threading_utils.TaskChannel()
666 pending = 0
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800667
668 # Ensure all digests are calculated.
669 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700670 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800671
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000672 # Enqueue all requests.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800673 for batch in batch_items_for_check(items):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000674 self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
675 self._storage_api.contains, batch)
676 pending += 1
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800677
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000678 # Yield results as they come in.
679 for _ in xrange(pending):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800680 for missing_item, push_state in channel.pull().iteritems():
681 yield missing_item, push_state
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000682
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000683
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800684def batch_items_for_check(items):
685 """Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000686
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800687 Each batch corresponds to a single 'exists?' query to the server via a call
688 to StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000689
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800690 Arguments:
691 items: a list of Item objects.
692
693 Yields:
694 Batches of items to query for existence in a single operation,
695 each batch is a list of Item objects.
696 """
697 batch_count = 0
698 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
699 next_queries = []
700 for item in sorted(items, key=lambda x: x.size, reverse=True):
701 next_queries.append(item)
702 if len(next_queries) == batch_size_limit:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000703 yield next_queries
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800704 next_queries = []
705 batch_count += 1
706 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
707 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
708 if next_queries:
709 yield next_queries
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000710
711
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000712class FetchQueue(object):
713 """Fetches items from Storage and places them into LocalCache.
714
715 It manages multiple concurrent fetch operations. Acts as a bridge between
716 Storage and LocalCache so that Storage and LocalCache don't depend on each
717 other at all.
718 """
719
720 def __init__(self, storage, cache):
721 self.storage = storage
722 self.cache = cache
723 self._channel = threading_utils.TaskChannel()
724 self._pending = set()
725 self._accessed = set()
726 self._fetched = cache.cached_set()
727
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800728 def add(self, digest, size=UNKNOWN_FILE_SIZE, priority=WorkerPool.MED):
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000729 """Starts asynchronous fetch of item |digest|."""
730 # Fetching it now?
731 if digest in self._pending:
732 return
733
734 # Mark this file as in use, verify_all_cached will later ensure it is still
735 # in cache.
736 self._accessed.add(digest)
737
738 # Already fetched? Notify cache to update item's LRU position.
739 if digest in self._fetched:
740 # 'touch' returns True if item is in cache and not corrupted.
741 if self.cache.touch(digest, size):
742 return
743 # Item is corrupted, remove it from cache and fetch it again.
744 self._fetched.remove(digest)
745 self.cache.evict(digest)
746
747 # TODO(maruel): It should look at the free disk space, the current cache
748 # size and the size of the new item on every new item:
749 # - Trim the cache as more entries are listed when free disk space is low,
750 # otherwise if the amount of data downloaded during the run > free disk
751 # space, it'll crash.
752 # - Make sure there's enough free disk space to fit all dependencies of
753 # this run! If not, abort early.
754
755 # Start fetching.
756 self._pending.add(digest)
757 self.storage.async_fetch(
758 self._channel, priority, digest, size,
759 functools.partial(self.cache.write, digest))
760
761 def wait(self, digests):
762 """Starts a loop that waits for at least one of |digests| to be retrieved.
763
764 Returns the first digest retrieved.
765 """
766 # Flush any already fetched items.
767 for digest in digests:
768 if digest in self._fetched:
769 return digest
770
771 # Ensure all requested items are being fetched now.
772 assert all(digest in self._pending for digest in digests), (
773 digests, self._pending)
774
775 # Wait for some requested item to finish fetching.
776 while self._pending:
777 digest = self._channel.pull()
778 self._pending.remove(digest)
779 self._fetched.add(digest)
780 if digest in digests:
781 return digest
782
783 # Should never reach this point due to assert above.
784 raise RuntimeError('Impossible state')
785
786 def inject_local_file(self, path, algo):
787 """Adds local file to the cache as if it was fetched from storage."""
788 with open(path, 'rb') as f:
789 data = f.read()
790 digest = algo(data).hexdigest()
791 self.cache.write(digest, [data])
792 self._fetched.add(digest)
793 return digest
794
795 @property
796 def pending_count(self):
797 """Returns number of items to be fetched."""
798 return len(self._pending)
799
800 def verify_all_cached(self):
801 """True if all accessed items are in cache."""
802 return self._accessed.issubset(self.cache.cached_set())
803
804
805class FetchStreamVerifier(object):
806 """Verifies that fetched file is valid before passing it to the LocalCache."""
807
808 def __init__(self, stream, expected_size):
809 self.stream = stream
810 self.expected_size = expected_size
811 self.current_size = 0
812
813 def run(self):
814 """Generator that yields same items as |stream|.
815
816 Verifies |stream| is complete before yielding a last chunk to consumer.
817
818 Also wraps IOError produced by consumer into MappingError exceptions since
819 otherwise Storage will retry fetch on unrelated local cache errors.
820 """
821 # Read one chunk ahead, keep it in |stored|.
822 # That way a complete stream can be verified before pushing last chunk
823 # to consumer.
824 stored = None
825 for chunk in self.stream:
826 assert chunk is not None
827 if stored is not None:
828 self._inspect_chunk(stored, is_last=False)
829 try:
830 yield stored
831 except IOError as exc:
832 raise MappingError('Failed to store an item in cache: %s' % exc)
833 stored = chunk
834 if stored is not None:
835 self._inspect_chunk(stored, is_last=True)
836 try:
837 yield stored
838 except IOError as exc:
839 raise MappingError('Failed to store an item in cache: %s' % exc)
840
841 def _inspect_chunk(self, chunk, is_last):
842 """Called for each fetched chunk before passing it to consumer."""
843 self.current_size += len(chunk)
844 if (is_last and (self.expected_size != UNKNOWN_FILE_SIZE) and
845 (self.expected_size != self.current_size)):
846 raise IOError('Incorrect file size: expected %d, got %d' % (
847 self.expected_size, self.current_size))
848
849
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000850class StorageApi(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800851 """Interface for classes that implement low-level storage operations.
852
853 StorageApi is oblivious of compression and hashing scheme used. This details
854 are handled in higher level Storage class.
855
856 Clients should generally not use StorageApi directly. Storage class is
857 preferred since it implements compression and upload optimizations.
858 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000859
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700860 @property
861 def location(self):
862 """Location of a backing store that this class is using.
863
864 Exact meaning depends on the type. For IsolateServer it is an URL of isolate
865 server, for FileSystem is it a path in file system.
866 """
867 raise NotImplementedError()
868
869 @property
870 def namespace(self):
871 """Isolate namespace used by this storage.
872
873 Indirectly defines hashing scheme and compression method used.
874 """
875 raise NotImplementedError()
876
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000877 def get_fetch_url(self, digest):
878 """Returns an URL that can be used to fetch an item with given digest.
879
880 Arguments:
881 digest: hex digest of item to fetch.
882
883 Returns:
884 An URL or None if the protocol doesn't support this.
885 """
886 raise NotImplementedError()
887
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800888 def fetch(self, digest, offset=0):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000889 """Fetches an object and yields its content.
890
891 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000892 digest: hash digest of item to download.
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800893 offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000894
895 Yields:
896 Chunks of downloaded item (as str objects).
897 """
898 raise NotImplementedError()
899
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800900 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000901 """Uploads an |item| with content generated by |content| generator.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000902
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800903 |item| MUST go through 'contains' call to get |push_state| before it can
904 be pushed to the storage.
905
906 To be clear, here is one possible usage:
907 all_items = [... all items to push as Item subclasses ...]
908 for missing_item, push_state in storage_api.contains(all_items).items():
909 storage_api.push(missing_item, push_state)
910
911 When pushing to a namespace with compression, data that should be pushed
912 and data provided by the item is not the same. In that case |content| is
913 not None and it yields chunks of compressed data (using item.content() as
914 a source of original uncompressed data). This is implemented by Storage
915 class.
916
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000917 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000918 item: Item object that holds information about an item being pushed.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800919 push_state: push state object as returned by 'contains' call.
920 content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000921
922 Returns:
923 None.
924 """
925 raise NotImplementedError()
926
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000927 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800928 """Checks for |items| on the server, prepares missing ones for upload.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000929
930 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800931 items: list of Item objects to check for presence.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000932
933 Returns:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800934 A dict missing Item -> opaque push state object to be passed to 'push'.
935 See doc string for 'push'.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000936 """
937 raise NotImplementedError()
938
939
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800940class _IsolateServerPushState(object):
941 """Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger27f03da2014-02-12 16:47:01 -0500942
943 Note this needs to be a global class to support pickling.
944 """
945
946 def __init__(self, upload_url, finalize_url):
947 self.upload_url = upload_url
948 self.finalize_url = finalize_url
949 self.uploaded = False
950 self.finalized = False
951
952
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000953class IsolateServer(StorageApi):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000954 """StorageApi implementation that downloads and uploads to Isolate Server.
955
956 It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800957 Works only within single namespace.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000958 """
959
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000960 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000961 super(IsolateServer, self).__init__()
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000962 assert base_url.startswith('http'), base_url
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700963 self._base_url = base_url.rstrip('/')
964 self._namespace = namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000965 self._lock = threading.Lock()
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000966 self._server_caps = None
967
968 @staticmethod
969 def _generate_handshake_request():
970 """Returns a dict to be sent as handshake request body."""
971 # TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
972 return {
973 'client_app_version': __version__,
974 'fetcher': True,
975 'protocol_version': ISOLATE_PROTOCOL_VERSION,
976 'pusher': True,
977 }
978
979 @staticmethod
980 def _validate_handshake_response(caps):
981 """Validates and normalizes handshake response."""
982 logging.info('Protocol version: %s', caps['protocol_version'])
983 logging.info('Server version: %s', caps['server_app_version'])
984 if caps.get('error'):
985 raise MappingError(caps['error'])
986 if not caps['access_token']:
987 raise ValueError('access_token is missing')
988 return caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000989
990 @property
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000991 def _server_capabilities(self):
992 """Performs handshake with the server if not yet done.
993
994 Returns:
995 Server capabilities dictionary as returned by /handshake endpoint.
996
997 Raises:
998 MappingError if server rejects the handshake.
999 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +00001000 # TODO(maruel): Make this request much earlier asynchronously while the
1001 # files are being enumerated.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001002
1003 # TODO(vadimsh): Put |namespace| in the URL so that server can apply
1004 # namespace-level ACLs to this call.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001005 with self._lock:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001006 if self._server_caps is None:
1007 request_body = json.dumps(
1008 self._generate_handshake_request(), separators=(',', ':'))
1009 response = net.url_read(
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001010 url=self._base_url + '/content-gs/handshake',
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001011 data=request_body,
1012 content_type='application/json',
1013 method='POST')
1014 if response is None:
1015 raise MappingError('Failed to perform handshake.')
1016 try:
1017 caps = json.loads(response)
1018 if not isinstance(caps, dict):
1019 raise ValueError('Expecting JSON dict')
1020 self._server_caps = self._validate_handshake_response(caps)
1021 except (ValueError, KeyError, TypeError) as exc:
1022 # KeyError exception has very confusing str conversion: it's just a
1023 # missing key value and nothing else. So print exception class name
1024 # as well.
1025 raise MappingError('Invalid handshake response (%s): %s' % (
1026 exc.__class__.__name__, exc))
1027 return self._server_caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001028
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001029 @property
1030 def location(self):
1031 return self._base_url
1032
1033 @property
1034 def namespace(self):
1035 return self._namespace
1036
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001037 def get_fetch_url(self, digest):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001038 assert isinstance(digest, basestring)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001039 return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001040 self._base_url, self._namespace, digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001041
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001042 def fetch(self, digest, offset=0):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001043 source_url = self.get_fetch_url(digest)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001044 logging.debug('download_file(%s, %d)', source_url, offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001045
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001046 connection = net.url_open(
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001047 source_url,
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001048 read_timeout=DOWNLOAD_READ_TIMEOUT,
1049 headers={'Range': 'bytes=%d-' % offset} if offset else None)
1050
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001051 if not connection:
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001052 raise IOError('Request failed - %s' % source_url)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001053
1054 # If |offset| is used, verify server respects it by checking Content-Range.
1055 if offset:
1056 content_range = connection.get_header('Content-Range')
1057 if not content_range:
1058 raise IOError('Missing Content-Range header')
1059
1060 # 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
1061 # According to a spec, <size> can be '*' meaning "Total size of the file
1062 # is not known in advance".
1063 try:
1064 match = re.match(r'bytes (\d+)-(\d+)/(\d+|\*)', content_range)
1065 if not match:
1066 raise ValueError()
1067 content_offset = int(match.group(1))
1068 last_byte_index = int(match.group(2))
1069 size = None if match.group(3) == '*' else int(match.group(3))
1070 except ValueError:
1071 raise IOError('Invalid Content-Range header: %s' % content_range)
1072
1073 # Ensure returned offset equals requested one.
1074 if offset != content_offset:
1075 raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
1076 offset, content_offset, content_range))
1077
1078 # Ensure entire tail of the file is returned.
1079 if size is not None and last_byte_index + 1 != size:
1080 raise IOError('Incomplete response. Content-Range: %s' % content_range)
1081
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001082 return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001083
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001084 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001085 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001086 assert item.digest is not None
1087 assert item.size is not None
1088 assert isinstance(push_state, _IsolateServerPushState)
1089 assert not push_state.finalized
1090
1091 # Default to item.content().
1092 content = item.content() if content is None else content
1093
1094 # Do not iterate byte by byte over 'str'. Push it all as a single chunk.
1095 if isinstance(content, basestring):
1096 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1097 content = [content]
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001098
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001099 # TODO(vadimsh): Do not read from |content| generator when retrying push.
1100 # If |content| is indeed a generator, it can not be re-winded back
1101 # to the beginning of the stream. A retry will find it exhausted. A possible
1102 # solution is to wrap |content| generator with some sort of caching
1103 # restartable generator. It should be done alongside streaming support
1104 # implementation.
1105
1106 # This push operation may be a retry after failed finalization call below,
1107 # no need to reupload contents in that case.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001108 if not push_state.uploaded:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001109 # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
1110 # upload support is implemented.
1111 if isinstance(content, list) and len(content) == 1:
1112 content = content[0]
1113 else:
1114 content = ''.join(content)
1115 # PUT file to |upload_url|.
1116 response = net.url_read(
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001117 url=push_state.upload_url,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001118 data=content,
1119 content_type='application/octet-stream',
1120 method='PUT')
1121 if response is None:
1122 raise IOError('Failed to upload a file %s to %s' % (
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001123 item.digest, push_state.upload_url))
1124 push_state.uploaded = True
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001125 else:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001126 logging.info(
1127 'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001128
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001129 # Optionally notify the server that it's done.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001130 if push_state.finalize_url:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001131 # TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
1132 # send it to isolated server. That way isolate server can verify that
1133 # the data safely reached Google Storage (GS provides MD5 and CRC32C of
1134 # stored files).
1135 response = net.url_read(
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001136 url=push_state.finalize_url,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001137 data='',
1138 content_type='application/json',
1139 method='POST')
1140 if response is None:
1141 raise IOError('Failed to finalize an upload of %s' % item.digest)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001142 push_state.finalized = True
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001143
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001144 def contains(self, items):
1145 logging.info('Checking existence of %d files...', len(items))
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001146
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001147 # Ensure all items were initialized with 'prepare' call. Storage does that.
1148 assert all(i.digest is not None and i.size is not None for i in items)
1149
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001150 # Request body is a json encoded list of dicts.
1151 body = [
1152 {
1153 'h': item.digest,
1154 's': item.size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001155 'i': int(item.high_priority),
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001156 } for item in items
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001157 ]
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001158
1159 query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001160 self._base_url,
1161 self._namespace,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001162 urllib.quote(self._server_capabilities['access_token']))
1163 response_body = net.url_read(
1164 url=query_url,
1165 data=json.dumps(body, separators=(',', ':')),
1166 content_type='application/json',
1167 method='POST')
1168 if response_body is None:
1169 raise MappingError('Failed to execute /pre-upload query')
1170
1171 # Response body is a list of push_urls (or null if file is already present).
1172 try:
1173 response = json.loads(response_body)
1174 if not isinstance(response, list):
1175 raise ValueError('Expecting response with json-encoded list')
1176 if len(response) != len(items):
1177 raise ValueError(
1178 'Incorrect number of items in the list, expected %d, '
1179 'but got %d' % (len(items), len(response)))
1180 except ValueError as err:
1181 raise MappingError(
1182 'Invalid response from server: %s, body is %s' % (err, response_body))
1183
1184 # Pick Items that are missing, attach _PushState to them.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001185 missing_items = {}
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001186 for i, push_urls in enumerate(response):
1187 if push_urls:
1188 assert len(push_urls) == 2, str(push_urls)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001189 missing_items[items[i]] = _IsolateServerPushState(
1190 push_urls[0], push_urls[1])
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001191 logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001192 len(items), len(items) - len(missing_items))
1193 return missing_items
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001194
1195
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001196class FileSystem(StorageApi):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001197 """StorageApi implementation that fetches data from the file system.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001198
1199 The common use case is a NFS/CIFS file server that is mounted locally that is
1200 used to fetch the file on a local partition.
1201 """
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001202
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001203 # Used for push_state instead of None. That way caller is forced to
1204 # call 'contains' before 'push'. Naively passing None in 'push' will not work.
1205 _DUMMY_PUSH_STATE = object()
1206
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001207 def __init__(self, base_path, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001208 super(FileSystem, self).__init__()
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001209 self._base_path = base_path
1210 self._namespace = namespace
1211
1212 @property
1213 def location(self):
1214 return self._base_path
1215
1216 @property
1217 def namespace(self):
1218 return self._namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001219
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001220 def get_fetch_url(self, digest):
1221 return None
1222
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001223 def fetch(self, digest, offset=0):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001224 assert isinstance(digest, basestring)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001225 return file_read(os.path.join(self._base_path, digest), offset=offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001226
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001227 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001228 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001229 assert item.digest is not None
1230 assert item.size is not None
1231 assert push_state is self._DUMMY_PUSH_STATE
1232 content = item.content() if content is None else content
1233 if isinstance(content, basestring):
1234 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1235 content = [content]
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001236 file_write(os.path.join(self._base_path, item.digest), content)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001237
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001238 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001239 assert all(i.digest is not None and i.size is not None for i in items)
1240 return dict(
1241 (item, self._DUMMY_PUSH_STATE) for item in items
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001242 if not os.path.exists(os.path.join(self._base_path, item.digest))
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001243 )
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001244
1245
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001246class LocalCache(object):
1247 """Local cache that stores objects fetched via Storage.
1248
1249 It can be accessed concurrently from multiple threads, so it should protect
1250 its internal state with some lock.
1251 """
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001252 cache_dir = None
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001253
1254 def __enter__(self):
1255 """Context manager interface."""
1256 return self
1257
1258 def __exit__(self, _exc_type, _exec_value, _traceback):
1259 """Context manager interface."""
1260 return False
1261
1262 def cached_set(self):
1263 """Returns a set of all cached digests (always a new object)."""
1264 raise NotImplementedError()
1265
1266 def touch(self, digest, size):
1267 """Ensures item is not corrupted and updates its LRU position.
1268
1269 Arguments:
1270 digest: hash digest of item to check.
1271 size: expected size of this item.
1272
1273 Returns:
1274 True if item is in cache and not corrupted.
1275 """
1276 raise NotImplementedError()
1277
1278 def evict(self, digest):
1279 """Removes item from cache if it's there."""
1280 raise NotImplementedError()
1281
1282 def read(self, digest):
1283 """Returns contents of the cached item as a single str."""
1284 raise NotImplementedError()
1285
1286 def write(self, digest, content):
1287 """Reads data from |content| generator and stores it in cache."""
1288 raise NotImplementedError()
1289
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001290 def hardlink(self, digest, dest, file_mode):
1291 """Ensures file at |dest| has same content as cached |digest|.
1292
1293 If file_mode is provided, it is used to set the executable bit if
1294 applicable.
1295 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001296 raise NotImplementedError()
1297
1298
1299class MemoryCache(LocalCache):
1300 """LocalCache implementation that stores everything in memory."""
1301
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001302 def __init__(self, file_mode_mask=0500):
1303 """Args:
1304 file_mode_mask: bit mask to AND file mode with. Default value will make
1305 all mapped files to be read only.
1306 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001307 super(MemoryCache, self).__init__()
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001308 self._file_mode_mask = file_mode_mask
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001309 # Let's not assume dict is thread safe.
1310 self._lock = threading.Lock()
1311 self._contents = {}
1312
1313 def cached_set(self):
1314 with self._lock:
1315 return set(self._contents)
1316
1317 def touch(self, digest, size):
1318 with self._lock:
1319 return digest in self._contents
1320
1321 def evict(self, digest):
1322 with self._lock:
1323 self._contents.pop(digest, None)
1324
1325 def read(self, digest):
1326 with self._lock:
1327 return self._contents[digest]
1328
1329 def write(self, digest, content):
1330 # Assemble whole stream before taking the lock.
1331 data = ''.join(content)
1332 with self._lock:
1333 self._contents[digest] = data
1334
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001335 def hardlink(self, digest, dest, file_mode):
1336 """Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001337 file_write(dest, [self.read(digest)])
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001338 if file_mode is not None:
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001339 os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001340
1341
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001342def get_hash_algo(_namespace):
1343 """Return hash algorithm class to use when uploading to given |namespace|."""
1344 # TODO(vadimsh): Implement this at some point.
1345 return hashlib.sha1
1346
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001347
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001348def is_namespace_with_compression(namespace):
1349 """Returns True if given |namespace| stores compressed objects."""
1350 return namespace.endswith(('-gzip', '-deflate'))
1351
1352
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001353def get_storage_api(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001354 """Returns an object that implements low-level StorageApi interface.
1355
1356 It is used by Storage to work with single isolate |namespace|. It should
1357 rarely be used directly by clients, see 'get_storage' for
1358 a better alternative.
1359
1360 Arguments:
1361 file_or_url: a file path to use file system based storage, or URL of isolate
1362 service to use shared cloud based storage.
1363 namespace: isolate namespace to operate in, also defines hashing and
1364 compression scheme used, i.e. namespace names that end with '-gzip'
1365 store compressed data.
1366
1367 Returns:
1368 Instance of StorageApi subclass.
1369 """
Marc-Antoine Ruel37989932013-11-19 16:28:08 -05001370 if file_path.is_url(file_or_url):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001371 return IsolateServer(file_or_url, namespace)
1372 else:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001373 return FileSystem(file_or_url, namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001374
1375
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001376def get_storage(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001377 """Returns Storage class that can upload and download from |namespace|.
1378
1379 Arguments:
1380 file_or_url: a file path to use file system based storage, or URL of isolate
1381 service to use shared cloud based storage.
1382 namespace: isolate namespace to operate in, also defines hashing and
1383 compression scheme used, i.e. namespace names that end with '-gzip'
1384 store compressed data.
1385
1386 Returns:
1387 Instance of Storage.
1388 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001389 return Storage(get_storage_api(file_or_url, namespace))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001390
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001391
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001392def expand_symlinks(indir, relfile):
1393 """Follows symlinks in |relfile|, but treating symlinks that point outside the
1394 build tree as if they were ordinary directories/files. Returns the final
1395 symlink-free target and a list of paths to symlinks encountered in the
1396 process.
1397
1398 The rule about symlinks outside the build tree is for the benefit of the
1399 Chromium OS ebuild, which symlinks the output directory to an unrelated path
1400 in the chroot.
1401
1402 Fails when a directory loop is detected, although in theory we could support
1403 that case.
1404 """
1405 is_directory = relfile.endswith(os.path.sep)
1406 done = indir
1407 todo = relfile.strip(os.path.sep)
1408 symlinks = []
1409
1410 while todo:
1411 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
1412 done, todo)
1413 if not symlink:
1414 todo = file_path.fix_native_path_case(done, todo)
1415 done = os.path.join(done, todo)
1416 break
1417 symlink_path = os.path.join(done, pre_symlink, symlink)
1418 post_symlink = post_symlink.lstrip(os.path.sep)
1419 # readlink doesn't exist on Windows.
1420 # pylint: disable=E1101
1421 target = os.path.normpath(os.path.join(done, pre_symlink))
1422 symlink_target = os.readlink(symlink_path)
1423 if os.path.isabs(symlink_target):
1424 # Absolute path are considered a normal directories. The use case is
1425 # generally someone who puts the output directory on a separate drive.
1426 target = symlink_target
1427 else:
1428 # The symlink itself could be using the wrong path case.
1429 target = file_path.fix_native_path_case(target, symlink_target)
1430
1431 if not os.path.exists(target):
1432 raise MappingError(
1433 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
1434 target = file_path.get_native_path_case(target)
1435 if not file_path.path_starts_with(indir, target):
1436 done = symlink_path
1437 todo = post_symlink
1438 continue
1439 if file_path.path_starts_with(target, symlink_path):
1440 raise MappingError(
1441 'Can\'t map recursive symlink reference %s -> %s' %
1442 (symlink_path, target))
1443 logging.info('Found symlink: %s -> %s', symlink_path, target)
1444 symlinks.append(os.path.relpath(symlink_path, indir))
1445 # Treat the common prefix of the old and new paths as done, and start
1446 # scanning again.
1447 target = target.split(os.path.sep)
1448 symlink_path = symlink_path.split(os.path.sep)
1449 prefix_length = 0
1450 for target_piece, symlink_path_piece in zip(target, symlink_path):
1451 if target_piece == symlink_path_piece:
1452 prefix_length += 1
1453 else:
1454 break
1455 done = os.path.sep.join(target[:prefix_length])
1456 todo = os.path.join(
1457 os.path.sep.join(target[prefix_length:]), post_symlink)
1458
1459 relfile = os.path.relpath(done, indir)
1460 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
1461 return relfile, symlinks
1462
1463
1464def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
1465 """Expands a single input. It can result in multiple outputs.
1466
1467 This function is recursive when relfile is a directory.
1468
1469 Note: this code doesn't properly handle recursive symlink like one created
1470 with:
1471 ln -s .. foo
1472 """
1473 if os.path.isabs(relfile):
1474 raise MappingError('Can\'t map absolute path %s' % relfile)
1475
1476 infile = file_path.normpath(os.path.join(indir, relfile))
1477 if not infile.startswith(indir):
1478 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
1479
1480 filepath = os.path.join(indir, relfile)
1481 native_filepath = file_path.get_native_path_case(filepath)
1482 if filepath != native_filepath:
1483 # Special case './'.
1484 if filepath != native_filepath + '.' + os.path.sep:
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001485 # While it'd be nice to enforce path casing on Windows, it's impractical.
1486 # Also give up enforcing strict path case on OSX. Really, it's that sad.
1487 # The case where it happens is very specific and hard to reproduce:
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001488 # get_native_path_case(
1489 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
1490 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
1491 #
1492 # Note that this is really something deep in OSX because running
1493 # ls Foo.framework/Versions/A
1494 # will print out 'Resources', while file_path.get_native_path_case()
1495 # returns a lower case 'r'.
1496 #
1497 # So *something* is happening under the hood resulting in the command 'ls'
1498 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
1499 # have no idea why.
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001500 if sys.platform not in ('darwin', 'win32'):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001501 raise MappingError(
1502 'File path doesn\'t equal native file path\n%s != %s' %
1503 (filepath, native_filepath))
1504
1505 symlinks = []
1506 if follow_symlinks:
1507 relfile, symlinks = expand_symlinks(indir, relfile)
1508
1509 if relfile.endswith(os.path.sep):
1510 if not os.path.isdir(infile):
1511 raise MappingError(
1512 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
1513
1514 # Special case './'.
1515 if relfile.startswith('.' + os.path.sep):
1516 relfile = relfile[2:]
1517 outfiles = symlinks
1518 try:
1519 for filename in os.listdir(infile):
1520 inner_relfile = os.path.join(relfile, filename)
1521 if blacklist and blacklist(inner_relfile):
1522 continue
1523 if os.path.isdir(os.path.join(indir, inner_relfile)):
1524 inner_relfile += os.path.sep
1525 outfiles.extend(
1526 expand_directory_and_symlink(indir, inner_relfile, blacklist,
1527 follow_symlinks))
1528 return outfiles
1529 except OSError as e:
1530 raise MappingError(
1531 'Unable to iterate over directory %s.\n%s' % (infile, e))
1532 else:
1533 # Always add individual files even if they were blacklisted.
1534 if os.path.isdir(infile):
1535 raise MappingError(
1536 'Input directory %s must have a trailing slash' % infile)
1537
1538 if not os.path.isfile(infile):
1539 raise MappingError('Input file %s doesn\'t exist' % infile)
1540
1541 return symlinks + [relfile]
1542
1543
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001544def process_input(filepath, prevdict, read_only, algo):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001545 """Processes an input file, a dependency, and return meta data about it.
1546
1547 Behaviors:
1548 - Retrieves the file mode, file size, file timestamp, file link
1549 destination if it is a file link and calcultate the SHA-1 of the file's
1550 content if the path points to a file and not a symlink.
1551
1552 Arguments:
1553 filepath: File to act on.
1554 prevdict: the previous dictionary. It is used to retrieve the cached sha-1
1555 to skip recalculating the hash. Optional.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001556 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001557 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
1558 windows, mode is not set since all files are 'executable' by
1559 default.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001560 algo: Hashing algorithm used.
1561
1562 Returns:
1563 The necessary data to create a entry in the 'files' section of an .isolated
1564 file.
1565 """
1566 out = {}
1567 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
1568 # if prevdict.get('T') == True:
1569 # # The file's content is ignored. Skip the time and hard code mode.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001570 # out['s'] = 0
1571 # out['h'] = algo().hexdigest()
1572 # out['T'] = True
1573 # return out
1574
1575 # Always check the file stat and check if it is a link. The timestamp is used
1576 # to know if the file's content/symlink destination should be looked into.
1577 # E.g. only reuse from prevdict if the timestamp hasn't changed.
1578 # There is the risk of the file's timestamp being reset to its last value
1579 # manually while its content changed. We don't protect against that use case.
1580 try:
1581 filestats = os.lstat(filepath)
1582 except OSError:
1583 # The file is not present.
1584 raise MappingError('%s is missing' % filepath)
1585 is_link = stat.S_ISLNK(filestats.st_mode)
1586
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001587 if sys.platform != 'win32':
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001588 # Ignore file mode on Windows since it's not really useful there.
1589 filemode = stat.S_IMODE(filestats.st_mode)
1590 # Remove write access for group and all access to 'others'.
1591 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
1592 if read_only:
1593 filemode &= ~stat.S_IWUSR
1594 if filemode & stat.S_IXUSR:
1595 filemode |= stat.S_IXGRP
1596 else:
1597 filemode &= ~stat.S_IXGRP
1598 if not is_link:
1599 out['m'] = filemode
1600
1601 # Used to skip recalculating the hash or link destination. Use the most recent
1602 # update time.
1603 # TODO(maruel): Save it in the .state file instead of .isolated so the
1604 # .isolated file is deterministic.
1605 out['t'] = int(round(filestats.st_mtime))
1606
1607 if not is_link:
1608 out['s'] = filestats.st_size
1609 # If the timestamp wasn't updated and the file size is still the same, carry
1610 # on the sha-1.
1611 if (prevdict.get('t') == out['t'] and
1612 prevdict.get('s') == out['s']):
1613 # Reuse the previous hash if available.
1614 out['h'] = prevdict.get('h')
1615 if not out.get('h'):
1616 out['h'] = hash_file(filepath, algo)
1617 else:
1618 # If the timestamp wasn't updated, carry on the link destination.
1619 if prevdict.get('t') == out['t']:
1620 # Reuse the previous link destination if available.
1621 out['l'] = prevdict.get('l')
1622 if out.get('l') is None:
1623 # The link could be in an incorrect path case. In practice, this only
1624 # happen on OSX on case insensitive HFS.
1625 # TODO(maruel): It'd be better if it was only done once, in
1626 # expand_directory_and_symlink(), so it would not be necessary to do again
1627 # here.
1628 symlink_value = os.readlink(filepath) # pylint: disable=E1101
1629 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
1630 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
1631 out['l'] = os.path.relpath(native_dest, filedir)
1632 return out
1633
1634
1635def save_isolated(isolated, data):
1636 """Writes one or multiple .isolated files.
1637
1638 Note: this reference implementation does not create child .isolated file so it
1639 always returns an empty list.
1640
1641 Returns the list of child isolated files that are included by |isolated|.
1642 """
1643 # Make sure the data is valid .isolated data by 'reloading' it.
1644 algo = SUPPORTED_ALGOS[data['algo']]
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001645 load_isolated(json.dumps(data), algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001646 tools.write_json(isolated, data, True)
1647 return []
1648
1649
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001650def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001651 """Uploads the given tree to the given url.
1652
1653 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001654 base_url: The base url, it is assume that |base_url|/has/ can be used to
1655 query if an element was already uploaded, and |base_url|/store/
1656 can be used to upload a new element.
1657 indir: Root directory the infiles are based in.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001658 infiles: dict of files to upload from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001659 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001660 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001661 logging.info('upload_tree(indir=%s, files=%d)', indir, len(infiles))
1662
1663 # Convert |indir| + |infiles| into a list of FileItem objects.
1664 # Filter out symlinks, since they are not represented by items on isolate
1665 # server side.
1666 items = [
1667 FileItem(
1668 path=os.path.join(indir, filepath),
1669 digest=metadata['h'],
1670 size=metadata['s'],
1671 high_priority=metadata.get('priority') == '0')
1672 for filepath, metadata in infiles.iteritems()
1673 if 'l' not in metadata
1674 ]
1675
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001676 with get_storage(base_url, namespace) as storage:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001677 storage.upload_items(items)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001678 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001679
1680
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001681def load_isolated(content, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001682 """Verifies the .isolated file is valid and loads this object with the json
1683 data.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001684
1685 Arguments:
1686 - content: raw serialized content to load.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001687 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
1688 algorithm used on the Isolate Server.
maruel@chromium.org41601642013-09-18 19:40:46 +00001689 """
1690 try:
1691 data = json.loads(content)
1692 except ValueError:
1693 raise ConfigError('Failed to parse: %s...' % content[:100])
1694
1695 if not isinstance(data, dict):
1696 raise ConfigError('Expected dict, got %r' % data)
1697
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001698 # Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001699 value = data.get('version', '1.0')
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001700 if not isinstance(value, basestring):
1701 raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001702 try:
1703 version = tuple(map(int, value.split('.')))
1704 except ValueError:
1705 raise ConfigError('Expected valid version, got %r' % value)
1706
1707 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
1708 # Major version must match.
1709 if version[0] != expected_version[0]:
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -05001710 raise ConfigError(
1711 'Expected compatible \'%s\' version, got %r' %
1712 (ISOLATED_FILE_VERSION, value))
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001713
1714 if algo is None:
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05001715 # TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001716 # Default the algorithm used in the .isolated file itself, falls back to
1717 # 'sha-1' if unspecified.
1718 algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
1719
maruel@chromium.org41601642013-09-18 19:40:46 +00001720 for key, value in data.iteritems():
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001721 if key == 'algo':
1722 if not isinstance(value, basestring):
1723 raise ConfigError('Expected string, got %r' % value)
1724 if value not in SUPPORTED_ALGOS:
1725 raise ConfigError(
1726 'Expected one of \'%s\', got %r' %
1727 (', '.join(sorted(SUPPORTED_ALGOS)), value))
1728 if value != SUPPORTED_ALGOS_REVERSE[algo]:
1729 raise ConfigError(
1730 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
1731
1732 elif key == 'command':
maruel@chromium.org41601642013-09-18 19:40:46 +00001733 if not isinstance(value, list):
1734 raise ConfigError('Expected list, got %r' % value)
1735 if not value:
1736 raise ConfigError('Expected non-empty command')
1737 for subvalue in value:
1738 if not isinstance(subvalue, basestring):
1739 raise ConfigError('Expected string, got %r' % subvalue)
1740
1741 elif key == 'files':
1742 if not isinstance(value, dict):
1743 raise ConfigError('Expected dict, got %r' % value)
1744 for subkey, subvalue in value.iteritems():
1745 if not isinstance(subkey, basestring):
1746 raise ConfigError('Expected string, got %r' % subkey)
1747 if not isinstance(subvalue, dict):
1748 raise ConfigError('Expected dict, got %r' % subvalue)
1749 for subsubkey, subsubvalue in subvalue.iteritems():
1750 if subsubkey == 'l':
1751 if not isinstance(subsubvalue, basestring):
1752 raise ConfigError('Expected string, got %r' % subsubvalue)
1753 elif subsubkey == 'm':
1754 if not isinstance(subsubvalue, int):
1755 raise ConfigError('Expected int, got %r' % subsubvalue)
1756 elif subsubkey == 'h':
1757 if not is_valid_hash(subsubvalue, algo):
1758 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
1759 elif subsubkey == 's':
Marc-Antoine Ruelaab3a622013-11-28 09:47:05 -05001760 if not isinstance(subsubvalue, (int, long)):
1761 raise ConfigError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001762 else:
1763 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001764 if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org41601642013-09-18 19:40:46 +00001765 raise ConfigError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001766 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
1767 subvalue)
1768 if bool('h' in subvalue) != bool('s' in subvalue):
1769 raise ConfigError(
1770 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
1771 subvalue)
1772 if bool('s' in subvalue) == bool('l' in subvalue):
1773 raise ConfigError(
1774 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
1775 subvalue)
1776 if bool('l' in subvalue) and bool('m' in subvalue):
1777 raise ConfigError(
1778 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org41601642013-09-18 19:40:46 +00001779 subvalue)
1780
1781 elif key == 'includes':
1782 if not isinstance(value, list):
1783 raise ConfigError('Expected list, got %r' % value)
1784 if not value:
1785 raise ConfigError('Expected non-empty includes list')
1786 for subvalue in value:
1787 if not is_valid_hash(subvalue, algo):
1788 raise ConfigError('Expected sha-1, got %r' % subvalue)
1789
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001790 elif key == 'os':
1791 if version >= (1, 4):
1792 raise ConfigError('Key \'os\' is not allowed starting version 1.4')
1793
maruel@chromium.org41601642013-09-18 19:40:46 +00001794 elif key == 'read_only':
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001795 if not value in (0, 1, 2):
1796 raise ConfigError('Expected 0, 1 or 2, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001797
1798 elif key == 'relative_cwd':
1799 if not isinstance(value, basestring):
1800 raise ConfigError('Expected string, got %r' % value)
1801
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001802 elif key == 'version':
1803 # Already checked above.
1804 pass
1805
maruel@chromium.org41601642013-09-18 19:40:46 +00001806 else:
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001807 raise ConfigError('Unknown key %r' % key)
maruel@chromium.org41601642013-09-18 19:40:46 +00001808
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001809 # Automatically fix os.path.sep if necessary. While .isolated files are always
1810 # in the the native path format, someone could want to download an .isolated
1811 # tree from another OS.
1812 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
1813 if 'files' in data:
1814 data['files'] = dict(
1815 (k.replace(wrong_path_sep, os.path.sep), v)
1816 for k, v in data['files'].iteritems())
1817 for v in data['files'].itervalues():
1818 if 'l' in v:
1819 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
1820 if 'relative_cwd' in data:
1821 data['relative_cwd'] = data['relative_cwd'].replace(
1822 wrong_path_sep, os.path.sep)
maruel@chromium.org41601642013-09-18 19:40:46 +00001823 return data
1824
1825
1826class IsolatedFile(object):
1827 """Represents a single parsed .isolated file."""
1828 def __init__(self, obj_hash, algo):
1829 """|obj_hash| is really the sha-1 of the file."""
1830 logging.debug('IsolatedFile(%s)' % obj_hash)
1831 self.obj_hash = obj_hash
1832 self.algo = algo
1833 # Set once all the left-side of the tree is parsed. 'Tree' here means the
1834 # .isolate and all the .isolated files recursively included by it with
1835 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1836 # .isolated file in the hash table, is important, as the later ones are not
1837 # processed until the firsts are retrieved and read.
1838 self.can_fetch = False
1839
1840 # Raw data.
1841 self.data = {}
1842 # A IsolatedFile instance, one per object in self.includes.
1843 self.children = []
1844
1845 # Set once the .isolated file is loaded.
1846 self._is_parsed = False
1847 # Set once the files are fetched.
1848 self.files_fetched = False
1849
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001850 def load(self, content):
maruel@chromium.org41601642013-09-18 19:40:46 +00001851 """Verifies the .isolated file is valid and loads this object with the json
1852 data.
1853 """
1854 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1855 assert not self._is_parsed
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001856 self.data = load_isolated(content, self.algo)
maruel@chromium.org41601642013-09-18 19:40:46 +00001857 self.children = [
1858 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
1859 ]
1860 self._is_parsed = True
1861
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001862 def fetch_files(self, fetch_queue, files):
maruel@chromium.org41601642013-09-18 19:40:46 +00001863 """Adds files in this .isolated file not present in |files| dictionary.
1864
1865 Preemptively request files.
1866
1867 Note that |files| is modified by this function.
1868 """
1869 assert self.can_fetch
1870 if not self._is_parsed or self.files_fetched:
1871 return
1872 logging.debug('fetch_files(%s)' % self.obj_hash)
1873 for filepath, properties in self.data.get('files', {}).iteritems():
1874 # Root isolated has priority on the files being mapped. In particular,
1875 # overriden files must not be fetched.
1876 if filepath not in files:
1877 files[filepath] = properties
1878 if 'h' in properties:
1879 # Preemptively request files.
1880 logging.debug('fetching %s' % filepath)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001881 fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
maruel@chromium.org41601642013-09-18 19:40:46 +00001882 self.files_fetched = True
1883
1884
1885class Settings(object):
1886 """Results of a completely parsed .isolated file."""
1887 def __init__(self):
1888 self.command = []
1889 self.files = {}
1890 self.read_only = None
1891 self.relative_cwd = None
1892 # The main .isolated file, a IsolatedFile instance.
1893 self.root = None
1894
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001895 def load(self, fetch_queue, root_isolated_hash, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001896 """Loads the .isolated and all the included .isolated asynchronously.
1897
1898 It enables support for "included" .isolated files. They are processed in
1899 strict order but fetched asynchronously from the cache. This is important so
1900 that a file in an included .isolated file that is overridden by an embedding
1901 .isolated file is not fetched needlessly. The includes are fetched in one
1902 pass and the files are fetched as soon as all the ones on the left-side
1903 of the tree were fetched.
1904
1905 The prioritization is very important here for nested .isolated files.
1906 'includes' have the highest priority and the algorithm is optimized for both
1907 deep and wide trees. A deep one is a long link of .isolated files referenced
1908 one at a time by one item in 'includes'. A wide one has a large number of
1909 'includes' in a single .isolated file. 'left' is defined as an included
1910 .isolated file earlier in the 'includes' list. So the order of the elements
1911 in 'includes' is important.
1912 """
1913 self.root = IsolatedFile(root_isolated_hash, algo)
1914
1915 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1916 pending = {}
1917 # Set of hashes of already retrieved items to refuse recursive includes.
1918 seen = set()
1919
1920 def retrieve(isolated_file):
1921 h = isolated_file.obj_hash
1922 if h in seen:
1923 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1924 assert h not in pending
1925 seen.add(h)
1926 pending[h] = isolated_file
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001927 fetch_queue.add(h, priority=WorkerPool.HIGH)
maruel@chromium.org41601642013-09-18 19:40:46 +00001928
1929 retrieve(self.root)
1930
1931 while pending:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001932 item_hash = fetch_queue.wait(pending)
maruel@chromium.org41601642013-09-18 19:40:46 +00001933 item = pending.pop(item_hash)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001934 item.load(fetch_queue.cache.read(item_hash))
maruel@chromium.org41601642013-09-18 19:40:46 +00001935 if item_hash == root_isolated_hash:
1936 # It's the root item.
1937 item.can_fetch = True
1938
1939 for new_child in item.children:
1940 retrieve(new_child)
1941
1942 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001943 self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org41601642013-09-18 19:40:46 +00001944
1945 def check(n):
1946 return all(check(x) for x in n.children) and n.files_fetched
1947 assert check(self.root)
1948
1949 self.relative_cwd = self.relative_cwd or ''
maruel@chromium.org41601642013-09-18 19:40:46 +00001950
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001951 def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org41601642013-09-18 19:40:46 +00001952 if node.can_fetch:
1953 if not node.files_fetched:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001954 self._update_self(fetch_queue, node)
maruel@chromium.org41601642013-09-18 19:40:46 +00001955 will_break = False
1956 for i in node.children:
1957 if not i.can_fetch:
1958 if will_break:
1959 break
1960 # Automatically mark the first one as fetcheable.
1961 i.can_fetch = True
1962 will_break = True
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001963 self._traverse_tree(fetch_queue, i)
maruel@chromium.org41601642013-09-18 19:40:46 +00001964
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001965 def _update_self(self, fetch_queue, node):
1966 node.fetch_files(fetch_queue, self.files)
maruel@chromium.org41601642013-09-18 19:40:46 +00001967 # Grabs properties.
1968 if not self.command and node.data.get('command'):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001969 # Ensure paths are correctly separated on windows.
maruel@chromium.org41601642013-09-18 19:40:46 +00001970 self.command = node.data['command']
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001971 if self.command:
1972 self.command[0] = self.command[0].replace('/', os.path.sep)
1973 self.command = tools.fix_python_path(self.command)
maruel@chromium.org41601642013-09-18 19:40:46 +00001974 if self.read_only is None and node.data.get('read_only') is not None:
1975 self.read_only = node.data['read_only']
1976 if (self.relative_cwd is None and
1977 node.data.get('relative_cwd') is not None):
1978 self.relative_cwd = node.data['relative_cwd']
1979
1980
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001981def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001982 """Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001983
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001984 Arguments:
1985 isolated_hash: hash of the root *.isolated file.
1986 storage: Storage class that communicates with isolate storage.
1987 cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001988 outdir: Output directory to map file tree to.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001989 require_command: Ensure *.isolated specifies a command to run.
1990
1991 Returns:
1992 Settings object that holds details about loaded *.isolated file.
1993 """
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001994 logging.debug(
1995 'fetch_isolated(%s, %s, %s, %s, %s)',
1996 isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001997 # Hash algorithm to use, defined by namespace |storage| is using.
1998 algo = storage.hash_algo
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001999 with cache:
2000 fetch_queue = FetchQueue(storage, cache)
2001 settings = Settings()
2002
2003 with tools.Profiler('GetIsolateds'):
2004 # Optionally support local files by manually adding them to cache.
2005 if not is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04002006 logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
2007 try:
2008 isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
2009 except IOError:
2010 raise MappingError(
2011 '%s doesn\'t seem to be a valid file. Did you intent to pass a '
2012 'valid hash?' % isolated_hash)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002013
2014 # Load all *.isolated and start loading rest of the files.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04002015 settings.load(fetch_queue, isolated_hash, algo)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002016 if require_command and not settings.command:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002017 # TODO(vadimsh): All fetch operations are already enqueue and there's no
2018 # easy way to cancel them.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002019 raise ConfigError('No command to run')
2020
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002021 with tools.Profiler('GetRest'):
2022 # Create file system hierarchy.
2023 if not os.path.isdir(outdir):
2024 os.makedirs(outdir)
2025 create_directories(outdir, settings.files)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -05002026 create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002027
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002028 # Ensure working directory exists.
2029 cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
2030 if not os.path.isdir(cwd):
2031 os.makedirs(cwd)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002032
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002033 # Multimap: digest -> list of pairs (path, props).
2034 remaining = {}
2035 for filepath, props in settings.files.iteritems():
2036 if 'h' in props:
2037 remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002038
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002039 # Now block on the remaining files to be downloaded and mapped.
2040 logging.info('Retrieving remaining files (%d of them)...',
2041 fetch_queue.pending_count)
2042 last_update = time.time()
2043 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
2044 while remaining:
2045 detector.ping()
2046
2047 # Wait for any item to finish fetching to cache.
2048 digest = fetch_queue.wait(remaining)
2049
2050 # Link corresponding files to a fetched item in cache.
2051 for filepath, props in remaining.pop(digest):
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05002052 cache.hardlink(
2053 digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002054
2055 # Report progress.
2056 duration = time.time() - last_update
2057 if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
2058 msg = '%d files remaining...' % len(remaining)
2059 print msg
2060 logging.info(msg)
2061 last_update = time.time()
2062
2063 # Cache could evict some items we just tried to fetch, it's a fatal error.
2064 if not fetch_queue.verify_all_cached():
2065 raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002066 return settings
2067
2068
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002069def directory_to_metadata(root, algo, blacklist):
2070 """Returns the FileItem list and .isolated metadata for a directory."""
2071 root = file_path.get_native_path_case(root)
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002072 paths = expand_directory_and_symlink(
2073 root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002074 metadata = dict(
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04002075 (relpath, process_input(os.path.join(root, relpath), {}, False, algo))
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002076 for relpath in paths
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002077 )
2078 for v in metadata.itervalues():
2079 v.pop('t')
2080 items = [
2081 FileItem(
2082 path=os.path.join(root, relpath),
2083 digest=meta['h'],
2084 size=meta['s'],
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002085 high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002086 for relpath, meta in metadata.iteritems() if 'h' in meta
2087 ]
2088 return items, metadata
2089
2090
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002091def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002092 """Stores every entries and returns the relevant data.
2093
2094 Arguments:
2095 storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002096 files: list of file paths to upload. If a directory is specified, a
2097 .isolated file is created and its hash is returned.
2098 blacklist: function that returns True if a file should be omitted.
2099 """
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002100 assert all(isinstance(i, unicode) for i in files), files
2101 if len(files) != len(set(map(os.path.abspath, files))):
2102 raise Error('Duplicate entries found.')
2103
2104 results = []
2105 # The temporary directory is only created as needed.
2106 tempdir = None
2107 try:
2108 # TODO(maruel): Yield the files to a worker thread.
2109 items_to_upload = []
2110 for f in files:
2111 try:
2112 filepath = os.path.abspath(f)
2113 if os.path.isdir(filepath):
2114 # Uploading a whole directory.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002115 items, metadata = directory_to_metadata(
2116 filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002117
2118 # Create the .isolated file.
2119 if not tempdir:
2120 tempdir = tempfile.mkdtemp(prefix='isolateserver')
2121 handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
2122 os.close(handle)
2123 data = {
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002124 'algo': SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002125 'files': metadata,
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -05002126 'version': ISOLATED_FILE_VERSION,
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002127 }
2128 save_isolated(isolated, data)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002129 h = hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002130 items_to_upload.extend(items)
2131 items_to_upload.append(
2132 FileItem(
2133 path=isolated,
2134 digest=h,
2135 size=os.stat(isolated).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002136 high_priority=True))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002137 results.append((h, f))
2138
2139 elif os.path.isfile(filepath):
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002140 h = hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002141 items_to_upload.append(
2142 FileItem(
2143 path=filepath,
2144 digest=h,
2145 size=os.stat(filepath).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002146 high_priority=f.endswith('.isolated')))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002147 results.append((h, f))
2148 else:
2149 raise Error('%s is neither a file or directory.' % f)
2150 except OSError:
2151 raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002152 # Technically we would care about which files were uploaded but we don't
2153 # much in practice.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002154 _uploaded_files = storage.upload_items(items_to_upload)
2155 return results
2156 finally:
2157 if tempdir:
2158 shutil.rmtree(tempdir)
2159
2160
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002161def archive(out, namespace, files, blacklist):
2162 if files == ['-']:
2163 files = sys.stdin.readlines()
2164
2165 if not files:
2166 raise Error('Nothing to upload')
2167
2168 files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002169 blacklist = tools.gen_blacklist(blacklist)
2170 with get_storage(out, namespace) as storage:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002171 results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002172 print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
2173
2174
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002175@subcommand.usage('<file1..fileN> or - to read from stdin')
2176def CMDarchive(parser, args):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002177 """Archives data to the server.
2178
2179 If a directory is specified, a .isolated file is created the whole directory
2180 is uploaded. Then this .isolated file can be included in another one to run
2181 commands.
2182
2183 The commands output each file that was processed with its content hash. For
2184 directories, the .isolated generated for the directory is listed as the
2185 directory entry itself.
2186 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002187 add_isolate_server_options(parser, False)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002188 parser.add_option(
2189 '--blacklist',
2190 action='append', default=list(DEFAULT_BLACKLIST),
2191 help='List of regexp to use as blacklist filter when uploading '
2192 'directories')
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002193 options, files = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002194 process_isolate_server_options(parser, options)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002195 try:
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002196 archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002197 except Error as e:
2198 parser.error(e.args[0])
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002199 return 0
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002200
2201
2202def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002203 """Download data from the server.
2204
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002205 It can either download individual files or a complete tree from a .isolated
2206 file.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002207 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002208 add_isolate_server_options(parser, True)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002209 parser.add_option(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002210 '-i', '--isolated', metavar='HASH',
2211 help='hash of an isolated file, .isolated file content is discarded, use '
2212 '--file if you need it')
2213 parser.add_option(
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002214 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
2215 help='hash and destination of a file, can be used multiple times')
2216 parser.add_option(
2217 '-t', '--target', metavar='DIR', default=os.getcwd(),
2218 help='destination directory')
2219 options, args = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002220 process_isolate_server_options(parser, options)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002221 if args:
2222 parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002223 if bool(options.isolated) == bool(options.file):
2224 parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002225
2226 options.target = os.path.abspath(options.target)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002227
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002228 remote = options.isolate_server or options.indir
2229 with get_storage(remote, options.namespace) as storage:
Vadim Shtayura3172be52013-12-03 12:49:05 -08002230 # Fetching individual files.
2231 if options.file:
2232 channel = threading_utils.TaskChannel()
2233 pending = {}
2234 for digest, dest in options.file:
2235 pending[digest] = dest
2236 storage.async_fetch(
2237 channel,
2238 WorkerPool.MED,
2239 digest,
2240 UNKNOWN_FILE_SIZE,
2241 functools.partial(file_write, os.path.join(options.target, dest)))
2242 while pending:
2243 fetched = channel.pull()
2244 dest = pending.pop(fetched)
2245 logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002246
Vadim Shtayura3172be52013-12-03 12:49:05 -08002247 # Fetching whole isolated tree.
2248 if options.isolated:
2249 settings = fetch_isolated(
2250 isolated_hash=options.isolated,
2251 storage=storage,
2252 cache=MemoryCache(),
Vadim Shtayura3172be52013-12-03 12:49:05 -08002253 outdir=options.target,
Vadim Shtayura3172be52013-12-03 12:49:05 -08002254 require_command=False)
2255 rel = os.path.join(options.target, settings.relative_cwd)
2256 print('To run this test please run from the directory %s:' %
2257 os.path.join(options.target, rel))
2258 print(' ' + ' '.join(settings.command))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002259
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002260 return 0
2261
2262
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002263@subcommand.usage('<file1..fileN> or - to read from stdin')
2264def CMDhashtable(parser, args):
2265 """Archives data to a hashtable on the file system.
2266
2267 If a directory is specified, a .isolated file is created the whole directory
2268 is uploaded. Then this .isolated file can be included in another one to run
2269 commands.
2270
2271 The commands output each file that was processed with its content hash. For
2272 directories, the .isolated generated for the directory is listed as the
2273 directory entry itself.
2274 """
2275 add_outdir_options(parser)
2276 parser.add_option(
2277 '--blacklist',
2278 action='append', default=list(DEFAULT_BLACKLIST),
2279 help='List of regexp to use as blacklist filter when uploading '
2280 'directories')
2281 options, files = parser.parse_args(args)
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002282 process_outdir_options(parser, options, os.getcwd())
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002283 try:
2284 # Do not compress files when archiving to the file system.
2285 archive(options.outdir, 'default', files, options.blacklist)
2286 except Error as e:
2287 parser.error(e.args[0])
2288 return 0
2289
2290
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002291def add_isolate_server_options(parser, add_indir):
2292 """Adds --isolate-server and --namespace options to parser.
2293
2294 Includes --indir if desired.
2295 """
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002296 parser.add_option(
2297 '-I', '--isolate-server',
2298 metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002299 help='URL of the Isolate Server to use. Defaults to the environment '
2300 'variable ISOLATE_SERVER if set. No need to specify https://, this '
2301 'is assumed.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002302 parser.add_option(
2303 '--namespace', default='default-gzip',
2304 help='The namespace to use on the Isolate Server, default: %default')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002305 if add_indir:
2306 parser.add_option(
2307 '--indir', metavar='DIR',
2308 help='Directory used to store the hashtable instead of using an '
2309 'isolate server.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002310
2311
2312def process_isolate_server_options(parser, options):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002313 """Processes the --isolate-server and --indir options and aborts if neither is
2314 specified.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002315 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002316 has_indir = hasattr(options, 'indir')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002317 if not options.isolate_server:
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002318 if not has_indir:
2319 parser.error('--isolate-server is required.')
2320 elif not options.indir:
2321 parser.error('Use one of --indir or --isolate-server.')
2322 else:
2323 if has_indir and options.indir:
2324 parser.error('Use only one of --indir or --isolate-server.')
2325
2326 if options.isolate_server:
2327 parts = urlparse.urlparse(options.isolate_server, 'https')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002328 if parts.query:
2329 parser.error('--isolate-server doesn\'t support query parameter.')
2330 if parts.fragment:
2331 parser.error('--isolate-server doesn\'t support fragment in the url.')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002332 # urlparse('foo.com') will result in netloc='', path='foo.com', which is not
2333 # what is desired here.
2334 new = list(parts)
2335 if not new[1] and new[2]:
2336 new[1] = new[2].rstrip('/')
2337 new[2] = ''
2338 new[2] = new[2].rstrip('/')
2339 options.isolate_server = urlparse.urlunparse(new)
2340 return
2341
2342 if file_path.is_url(options.indir):
2343 parser.error('Can\'t use an URL for --indir.')
2344 options.indir = unicode(options.indir).replace('/', os.path.sep)
2345 options.indir = os.path.abspath(
2346 os.path.normpath(os.path.join(os.getcwd(), options.indir)))
2347 if not os.path.isdir(options.indir):
2348 parser.error('Path given to --indir must exist.')
2349
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002350
2351
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002352def add_outdir_options(parser):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002353 """Adds --outdir, which is orthogonal to --isolate-server.
2354
2355 Note: On upload, separate commands are used between 'archive' and 'hashtable'.
2356 On 'download', the same command can download from either an isolate server or
2357 a file system.
2358 """
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002359 parser.add_option(
2360 '-o', '--outdir', metavar='DIR',
2361 help='Directory used to recreate the tree.')
2362
2363
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002364def process_outdir_options(parser, options, cwd):
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002365 if not options.outdir:
2366 parser.error('--outdir is required.')
2367 if file_path.is_url(options.outdir):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002368 parser.error('Can\'t use an URL for --outdir.')
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002369 options.outdir = unicode(options.outdir).replace('/', os.path.sep)
2370 # outdir doesn't need native path case since tracing is never done from there.
2371 options.outdir = os.path.abspath(
2372 os.path.normpath(os.path.join(cwd, options.outdir)))
2373 # In theory, we'd create the directory outdir right away. Defer doing it in
2374 # case there's errors in the command line.
2375
2376
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002377class OptionParserIsolateServer(tools.OptionParserWithLogging):
2378 def __init__(self, **kwargs):
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002379 tools.OptionParserWithLogging.__init__(
2380 self,
2381 version=__version__,
2382 prog=os.path.basename(sys.modules[__name__].__file__),
2383 **kwargs)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08002384 auth.add_auth_options(self)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002385
2386 def parse_args(self, *args, **kwargs):
2387 options, args = tools.OptionParserWithLogging.parse_args(
2388 self, *args, **kwargs)
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08002389 auth.process_auth_options(self, options)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002390 return options, args
2391
2392
2393def main(args):
2394 dispatcher = subcommand.CommandDispatcher(__name__)
2395 try:
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002396 return dispatcher.execute(OptionParserIsolateServer(), args)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00002397 except Exception as e:
2398 tools.report_error(e)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002399 return 1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00002400
2401
2402if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002403 fix_encoding.fix_encoding()
2404 tools.disable_buffering()
2405 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002406 sys.exit(main(sys.argv[1:]))