blob: 889a55ec55a0d06836d1483129339cdd94d0e267 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00005
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05006"""Archives a set of files or directories to a server."""
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00007
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04008__version__ = '0.3.4'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +000010import functools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000011import hashlib
maruel@chromium.org41601642013-09-18 19:40:46 +000012import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000013import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000014import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000015import re
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050016import shutil
17import stat
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000018import sys
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050019import tempfile
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000020import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000021import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000022import urllib
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050023import urlparse
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000024import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000025
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000026from third_party import colorama
27from third_party.depot_tools import fix_encoding
28from third_party.depot_tools import subcommand
29
Marc-Antoine Ruel37989932013-11-19 16:28:08 -050030from utils import file_path
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000031from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040032from utils import on_error
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000033from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000034from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000035
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080036import auth
37
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000038
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000039# Version of isolate protocol passed to the server in /handshake request.
40ISOLATE_PROTOCOL_VERSION = '1.0'
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -050041# Version stored and expected in .isolated files.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -040042ISOLATED_FILE_VERSION = '1.4'
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000043
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000044
45# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000046# All files are sorted by likelihood of a change in the file content
47# (currently file size is used to estimate this: larger the file -> larger the
48# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000049# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000050# and so on. Numbers here is a trade-off; the more per request, the lower the
51# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
52# larger values cause longer lookups, increasing the initial latency to start
53# uploading, which is especially an issue for large files. This value is
54# optimized for the "few thousands files to look up with minimal number of large
55# files missing" case.
56ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000057
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000058
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000059# A list of already compressed extension types that should not receive any
60# compression before being uploaded.
61ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel7f234c82014-08-06 21:55:18 -040062 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
63 'png', 'wav', 'zip',
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000064]
65
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000066
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000067# The file size to be used when we don't know the correct file size,
68# generally used for .isolated files.
69UNKNOWN_FILE_SIZE = None
70
71
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000072# Chunk size to use when doing disk I/O.
73DISK_FILE_CHUNK = 1024 * 1024
74
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000075# Chunk size to use when reading from network stream.
76NET_IO_FILE_CHUNK = 16 * 1024
77
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000078
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000079# Read timeout in seconds for downloads from isolate storage. If there's no
80# response from the server within this timeout whole download will be aborted.
81DOWNLOAD_READ_TIMEOUT = 60
82
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +000083# Maximum expected delay (in seconds) between successive file fetches
84# in run_tha_test. If it takes longer than that, a deadlock might be happening
85# and all stack frames for all threads are dumped to log.
86DEADLOCK_TIMEOUT = 5 * 60
87
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000088
maruel@chromium.org41601642013-09-18 19:40:46 +000089# The delay (in seconds) to wait between logging statements when retrieving
90# the required files. This is intended to let the user (or buildbot) know that
91# the program is still running.
92DELAY_BETWEEN_UPDATES_IN_SECS = 30
93
94
maruel@chromium.org385d73d2013-09-19 18:33:21 +000095# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
96# specify the names here.
97SUPPORTED_ALGOS = {
98 'md5': hashlib.md5,
99 'sha-1': hashlib.sha1,
100 'sha-512': hashlib.sha512,
101}
102
103
104# Used for serialization.
105SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
106
107
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -0500108DEFAULT_BLACKLIST = (
109 # Temporary vim or python files.
110 r'^.+\.(?:pyc|swp)$',
111 # .git or .svn directory.
112 r'^(?:.+' + re.escape(os.path.sep) + r'|)\.(?:git|svn)$',
113)
114
115
116# Chromium-specific.
117DEFAULT_BLACKLIST += (
118 r'^.+\.(?:run_test_cases)$',
119 r'^(?:.+' + re.escape(os.path.sep) + r'|)testserver\.log$',
120)
121
122
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -0500123class Error(Exception):
124 """Generic runtime error."""
125 pass
126
127
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000128class ConfigError(ValueError):
129 """Generic failure to load a .isolated file."""
130 pass
131
132
133class MappingError(OSError):
134 """Failed to recreate the tree."""
135 pass
136
137
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000138def is_valid_hash(value, algo):
139 """Returns if the value is a valid hash for the corresponding algorithm."""
140 size = 2 * algo().digest_size
141 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
142
143
144def hash_file(filepath, algo):
145 """Calculates the hash of a file without reading it all in memory at once.
146
147 |algo| should be one of hashlib hashing algorithm.
148 """
149 digest = algo()
maruel@chromium.org037758d2012-12-10 17:59:46 +0000150 with open(filepath, 'rb') as f:
151 while True:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000152 chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org037758d2012-12-10 17:59:46 +0000153 if not chunk:
154 break
155 digest.update(chunk)
156 return digest.hexdigest()
157
158
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000159def stream_read(stream, chunk_size):
160 """Reads chunks from |stream| and yields them."""
161 while True:
162 data = stream.read(chunk_size)
163 if not data:
164 break
165 yield data
166
167
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800168def file_read(filepath, chunk_size=DISK_FILE_CHUNK, offset=0):
169 """Yields file content in chunks of |chunk_size| starting from |offset|."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000170 with open(filepath, 'rb') as f:
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800171 if offset:
172 f.seek(offset)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000173 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000174 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000175 if not data:
176 break
177 yield data
178
179
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000180def file_write(filepath, content_generator):
181 """Writes file content as generated by content_generator.
182
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000183 Creates the intermediary directory as needed.
184
185 Returns the number of bytes written.
186
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000187 Meant to be mocked out in unit tests.
188 """
189 filedir = os.path.dirname(filepath)
190 if not os.path.isdir(filedir):
191 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000192 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000193 with open(filepath, 'wb') as f:
194 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000195 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000196 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000197 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000198
199
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000200def zip_compress(content_generator, level=7):
201 """Reads chunks from |content_generator| and yields zip compressed chunks."""
202 compressor = zlib.compressobj(level)
203 for chunk in content_generator:
204 compressed = compressor.compress(chunk)
205 if compressed:
206 yield compressed
207 tail = compressor.flush(zlib.Z_FINISH)
208 if tail:
209 yield tail
210
211
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000212def zip_decompress(content_generator, chunk_size=DISK_FILE_CHUNK):
213 """Reads zipped data from |content_generator| and yields decompressed data.
214
215 Decompresses data in small chunks (no larger than |chunk_size|) so that
216 zip bomb file doesn't cause zlib to preallocate huge amount of memory.
217
218 Raises IOError if data is corrupted or incomplete.
219 """
220 decompressor = zlib.decompressobj()
221 compressed_size = 0
222 try:
223 for chunk in content_generator:
224 compressed_size += len(chunk)
225 data = decompressor.decompress(chunk, chunk_size)
226 if data:
227 yield data
228 while decompressor.unconsumed_tail:
229 data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
230 if data:
231 yield data
232 tail = decompressor.flush()
233 if tail:
234 yield tail
235 except zlib.error as e:
236 raise IOError(
237 'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
238 # Ensure all data was read and decompressed.
239 if decompressor.unused_data or decompressor.unconsumed_tail:
240 raise IOError('Not all data was decompressed')
241
242
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000243def get_zip_compression_level(filename):
244 """Given a filename calculates the ideal zip compression level to use."""
245 file_ext = os.path.splitext(filename)[1].lower()
246 # TODO(csharp): Profile to find what compression level works best.
247 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
248
249
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000250def create_directories(base_directory, files):
251 """Creates the directory structure needed by the given list of files."""
252 logging.debug('create_directories(%s, %d)', base_directory, len(files))
253 # Creates the tree of directories to create.
254 directories = set(os.path.dirname(f) for f in files)
255 for item in list(directories):
256 while item:
257 directories.add(item)
258 item = os.path.dirname(item)
259 for d in sorted(directories):
260 if d:
261 os.mkdir(os.path.join(base_directory, d))
262
263
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500264def create_symlinks(base_directory, files):
265 """Creates any symlinks needed by the given set of files."""
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000266 for filepath, properties in files:
267 if 'l' not in properties:
268 continue
269 if sys.platform == 'win32':
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500270 # TODO(maruel): Create symlink via the win32 api.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000271 logging.warning('Ignoring symlink %s', filepath)
272 continue
273 outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500274 # os.symlink() doesn't exist on Windows.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000275 os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000276
277
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000278def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000279 """Determines if the given files appears valid.
280
281 Currently it just checks the file's size.
282 """
283 if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000284 return os.path.isfile(filepath)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000285 actual_size = os.stat(filepath).st_size
286 if size != actual_size:
287 logging.warning(
288 'Found invalid item %s; %d != %d',
289 os.path.basename(filepath), actual_size, size)
290 return False
291 return True
292
293
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000294class WorkerPool(threading_utils.AutoRetryThreadPool):
295 """Thread pool that automatically retries on IOError and runs a preconfigured
296 function.
297 """
298 # Initial and maximum number of worker threads.
299 INITIAL_WORKERS = 2
300 MAX_WORKERS = 16
301 RETRIES = 5
302
303 def __init__(self):
304 super(WorkerPool, self).__init__(
305 [IOError],
306 self.RETRIES,
307 self.INITIAL_WORKERS,
308 self.MAX_WORKERS,
309 0,
310 'remote')
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000311
312
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000313class Item(object):
314 """An item to push to Storage.
315
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800316 Its digest and size may be provided in advance, if known. Otherwise they will
317 be derived from content(). If digest is provided, it MUST correspond to
318 hash algorithm used by Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000319
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800320 When used with Storage, Item starts its life in a main thread, travels
321 to 'contains' thread, then to 'push' thread and then finally back to
322 the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000323 """
324
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800325 def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000326 self.digest = digest
327 self.size = size
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800328 self.high_priority = high_priority
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000329 self.compression_level = 6
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000330
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800331 def content(self):
332 """Iterable with content of this item as byte string (str) chunks."""
333 raise NotImplementedError()
334
335 def prepare(self, hash_algo):
336 """Ensures self.digest and self.size are set.
337
338 Uses content() as a source of data to calculate them. Does nothing if digest
339 and size is already known.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000340
341 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800342 hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000343 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800344 if self.digest is None or self.size is None:
345 digest = hash_algo()
346 total = 0
347 for chunk in self.content():
348 digest.update(chunk)
349 total += len(chunk)
350 self.digest = digest.hexdigest()
351 self.size = total
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000352
353
354class FileItem(Item):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800355 """A file to push to Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000356
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800357 Its digest and size may be provided in advance, if known. Otherwise they will
358 be derived from the file content.
359 """
360
361 def __init__(self, path, digest=None, size=None, high_priority=False):
362 super(FileItem, self).__init__(
363 digest,
364 size if size is not None else os.stat(path).st_size,
365 high_priority)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000366 self.path = path
367 self.compression_level = get_zip_compression_level(path)
368
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800369 def content(self):
370 return file_read(self.path)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000371
372
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000373class BufferItem(Item):
374 """A byte buffer to push to Storage."""
375
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800376 def __init__(self, buf, high_priority=False):
377 super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000378 self.buffer = buf
379
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800380 def content(self):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000381 return [self.buffer]
382
383
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000384class Storage(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800385 """Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000386
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800387 Implements compression support, parallel 'contains' checks, parallel uploads
388 and more.
389
390 Works only within single namespace (and thus hashing algorithm and compression
391 scheme are fixed).
392
393 Spawns multiple internal threads. Thread safe, but not fork safe.
394 """
395
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700396 def __init__(self, storage_api):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000397 self._storage_api = storage_api
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700398 self._use_zip = is_namespace_with_compression(storage_api.namespace)
399 self._hash_algo = get_hash_algo(storage_api.namespace)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000400 self._cpu_thread_pool = None
401 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000402
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000403 @property
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700404 def hash_algo(self):
405 """Hashing algorithm used to name files in storage based on their content.
406
407 Defined by |namespace|. See also 'get_hash_algo'.
408 """
409 return self._hash_algo
410
411 @property
412 def location(self):
413 """Location of a backing store that this class is using.
414
415 Exact meaning depends on the storage_api type. For IsolateServer it is
416 an URL of isolate server, for FileSystem is it a path in file system.
417 """
418 return self._storage_api.location
419
420 @property
421 def namespace(self):
422 """Isolate namespace used by this storage.
423
424 Indirectly defines hashing scheme and compression method used.
425 """
426 return self._storage_api.namespace
427
428 @property
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000429 def cpu_thread_pool(self):
430 """ThreadPool for CPU-bound tasks like zipping."""
431 if self._cpu_thread_pool is None:
432 self._cpu_thread_pool = threading_utils.ThreadPool(
433 2, max(threading_utils.num_processors(), 2), 0, 'zip')
434 return self._cpu_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000435
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000436 @property
437 def net_thread_pool(self):
438 """AutoRetryThreadPool for IO-bound tasks, retries IOError."""
439 if self._net_thread_pool is None:
440 self._net_thread_pool = WorkerPool()
441 return self._net_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000442
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000443 def close(self):
444 """Waits for all pending tasks to finish."""
445 if self._cpu_thread_pool:
446 self._cpu_thread_pool.join()
447 self._cpu_thread_pool.close()
448 self._cpu_thread_pool = None
449 if self._net_thread_pool:
450 self._net_thread_pool.join()
451 self._net_thread_pool.close()
452 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000453
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000454 def __enter__(self):
455 """Context manager interface."""
456 return self
457
458 def __exit__(self, _exc_type, _exc_value, _traceback):
459 """Context manager interface."""
460 self.close()
461 return False
462
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000463 def upload_items(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800464 """Uploads a bunch of items to the isolate server.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000465
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800466 It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000467
468 Arguments:
469 items: list of Item instances that represents data to upload.
470
471 Returns:
472 List of items that were uploaded. All other items are already there.
473 """
474 # TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
475 # used by swarming.py. There's no need to spawn multiple threads and try to
476 # do stuff in parallel: there's nothing to parallelize. 'contains' check and
477 # 'push' should be performed sequentially in the context of current thread.
478
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800479 # Ensure all digests are calculated.
480 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700481 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800482
vadimsh@chromium.org672cd2b2013-10-08 17:49:33 +0000483 # For each digest keep only first Item that matches it. All other items
484 # are just indistinguishable copies from the point of view of isolate
485 # server (it doesn't care about paths at all, only content and digests).
486 seen = {}
487 duplicates = 0
488 for item in items:
489 if seen.setdefault(item.digest, item) is not item:
490 duplicates += 1
491 items = seen.values()
492 if duplicates:
493 logging.info('Skipped %d duplicated files', duplicates)
494
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000495 # Enqueue all upload tasks.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000496 missing = set()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000497 uploaded = []
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800498 channel = threading_utils.TaskChannel()
499 for missing_item, push_state in self.get_missing_items(items):
500 missing.add(missing_item)
501 self.async_push(channel, missing_item, push_state)
502
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000503 # No need to spawn deadlock detector thread if there's nothing to upload.
504 if missing:
505 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
506 # Wait for all started uploads to finish.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000507 while len(uploaded) != len(missing):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000508 detector.ping()
509 item = channel.pull()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000510 uploaded.append(item)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000511 logging.debug(
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000512 'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000513 logging.info('All files are uploaded')
514
515 # Print stats.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000516 total = len(items)
517 total_size = sum(f.size for f in items)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000518 logging.info(
519 'Total: %6d, %9.1fkb',
520 total,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000521 total_size / 1024.)
522 cache_hit = set(items) - missing
523 cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000524 logging.info(
525 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
526 len(cache_hit),
527 cache_hit_size / 1024.,
528 len(cache_hit) * 100. / total,
529 cache_hit_size * 100. / total_size if total_size else 0)
530 cache_miss = missing
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000531 cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000532 logging.info(
533 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
534 len(cache_miss),
535 cache_miss_size / 1024.,
536 len(cache_miss) * 100. / total,
537 cache_miss_size * 100. / total_size if total_size else 0)
538
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000539 return uploaded
540
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800541 def get_fetch_url(self, item):
542 """Returns an URL that can be used to fetch given item once it's uploaded.
543
544 Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000545
546 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800547 item: Item to get fetch URL for.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000548
549 Returns:
550 An URL or None if underlying protocol doesn't support this.
551 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700552 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800553 return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000554
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800555 def async_push(self, channel, item, push_state):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000556 """Starts asynchronous push to the server in a parallel thread.
557
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800558 Can be used only after |item| was checked for presence on a server with
559 'get_missing_items' call. 'get_missing_items' returns |push_state| object
560 that contains storage specific information describing how to upload
561 the item (for example in case of cloud storage, it is signed upload URLs).
562
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000563 Arguments:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000564 channel: TaskChannel that receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000565 item: item to upload as instance of Item class.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800566 push_state: push state returned by 'get_missing_items' call for |item|.
567
568 Returns:
569 None, but |channel| later receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000570 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800571 # Thread pool task priority.
572 priority = WorkerPool.HIGH if item.high_priority else WorkerPool.MED
573
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000574 def push(content):
Marc-Antoine Ruel095a8be2014-03-21 14:58:19 -0400575 """Pushes an Item and returns it to |channel|."""
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700576 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800577 self._storage_api.push(item, push_state, content)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000578 return item
579
580 # If zipping is not required, just start a push task.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700581 if not self._use_zip:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800582 self.net_thread_pool.add_task_with_channel(
583 channel, priority, push, item.content())
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000584 return
585
586 # If zipping is enabled, zip in a separate thread.
587 def zip_and_push():
588 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
589 # content right here. It will block until all file is zipped.
590 try:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800591 stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000592 data = ''.join(stream)
593 except Exception as exc:
594 logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800595 channel.send_exception()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000596 return
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000597 self.net_thread_pool.add_task_with_channel(
598 channel, priority, push, [data])
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000599 self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000600
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800601 def push(self, item, push_state):
602 """Synchronously pushes a single item to the server.
603
604 If you need to push many items at once, consider using 'upload_items' or
605 'async_push' with instance of TaskChannel.
606
607 Arguments:
608 item: item to upload as instance of Item class.
609 push_state: push state returned by 'get_missing_items' call for |item|.
610
611 Returns:
612 Pushed item (same object as |item|).
613 """
614 channel = threading_utils.TaskChannel()
615 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
616 self.async_push(channel, item, push_state)
617 pushed = channel.pull()
618 assert pushed is item
619 return item
620
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000621 def async_fetch(self, channel, priority, digest, size, sink):
622 """Starts asynchronous fetch from the server in a parallel thread.
623
624 Arguments:
625 channel: TaskChannel that receives back |digest| when download ends.
626 priority: thread pool task priority for the fetch.
627 digest: hex digest of an item to download.
628 size: expected size of the item (after decompression).
629 sink: function that will be called as sink(generator).
630 """
631 def fetch():
632 try:
633 # Prepare reading pipeline.
634 stream = self._storage_api.fetch(digest)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700635 if self._use_zip:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000636 stream = zip_decompress(stream, DISK_FILE_CHUNK)
637 # Run |stream| through verifier that will assert its size.
638 verifier = FetchStreamVerifier(stream, size)
639 # Verified stream goes to |sink|.
640 sink(verifier.run())
641 except Exception as err:
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800642 logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000643 raise
644 return digest
645
646 # Don't bother with zip_thread_pool for decompression. Decompression is
647 # really fast and most probably IO bound anyway.
648 self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
649
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000650 def get_missing_items(self, items):
651 """Yields items that are missing from the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000652
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000653 Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000654
655 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000656 items: a list of Item objects to check.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000657
658 Yields:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800659 For each missing item it yields a pair (item, push_state), where:
660 * item - Item object that is missing (one of |items|).
661 * push_state - opaque object that contains storage specific information
662 describing how to upload the item (for example in case of cloud
663 storage, it is signed upload URLs). It can later be passed to
664 'async_push'.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000665 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000666 channel = threading_utils.TaskChannel()
667 pending = 0
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800668
669 # Ensure all digests are calculated.
670 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700671 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800672
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000673 # Enqueue all requests.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800674 for batch in batch_items_for_check(items):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000675 self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
676 self._storage_api.contains, batch)
677 pending += 1
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800678
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000679 # Yield results as they come in.
680 for _ in xrange(pending):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800681 for missing_item, push_state in channel.pull().iteritems():
682 yield missing_item, push_state
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000683
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000684
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800685def batch_items_for_check(items):
686 """Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000687
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800688 Each batch corresponds to a single 'exists?' query to the server via a call
689 to StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000690
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800691 Arguments:
692 items: a list of Item objects.
693
694 Yields:
695 Batches of items to query for existence in a single operation,
696 each batch is a list of Item objects.
697 """
698 batch_count = 0
699 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
700 next_queries = []
701 for item in sorted(items, key=lambda x: x.size, reverse=True):
702 next_queries.append(item)
703 if len(next_queries) == batch_size_limit:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000704 yield next_queries
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800705 next_queries = []
706 batch_count += 1
707 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
708 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
709 if next_queries:
710 yield next_queries
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000711
712
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000713class FetchQueue(object):
714 """Fetches items from Storage and places them into LocalCache.
715
716 It manages multiple concurrent fetch operations. Acts as a bridge between
717 Storage and LocalCache so that Storage and LocalCache don't depend on each
718 other at all.
719 """
720
721 def __init__(self, storage, cache):
722 self.storage = storage
723 self.cache = cache
724 self._channel = threading_utils.TaskChannel()
725 self._pending = set()
726 self._accessed = set()
727 self._fetched = cache.cached_set()
728
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800729 def add(self, digest, size=UNKNOWN_FILE_SIZE, priority=WorkerPool.MED):
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000730 """Starts asynchronous fetch of item |digest|."""
731 # Fetching it now?
732 if digest in self._pending:
733 return
734
735 # Mark this file as in use, verify_all_cached will later ensure it is still
736 # in cache.
737 self._accessed.add(digest)
738
739 # Already fetched? Notify cache to update item's LRU position.
740 if digest in self._fetched:
741 # 'touch' returns True if item is in cache and not corrupted.
742 if self.cache.touch(digest, size):
743 return
744 # Item is corrupted, remove it from cache and fetch it again.
745 self._fetched.remove(digest)
746 self.cache.evict(digest)
747
748 # TODO(maruel): It should look at the free disk space, the current cache
749 # size and the size of the new item on every new item:
750 # - Trim the cache as more entries are listed when free disk space is low,
751 # otherwise if the amount of data downloaded during the run > free disk
752 # space, it'll crash.
753 # - Make sure there's enough free disk space to fit all dependencies of
754 # this run! If not, abort early.
755
756 # Start fetching.
757 self._pending.add(digest)
758 self.storage.async_fetch(
759 self._channel, priority, digest, size,
760 functools.partial(self.cache.write, digest))
761
762 def wait(self, digests):
763 """Starts a loop that waits for at least one of |digests| to be retrieved.
764
765 Returns the first digest retrieved.
766 """
767 # Flush any already fetched items.
768 for digest in digests:
769 if digest in self._fetched:
770 return digest
771
772 # Ensure all requested items are being fetched now.
773 assert all(digest in self._pending for digest in digests), (
774 digests, self._pending)
775
776 # Wait for some requested item to finish fetching.
777 while self._pending:
778 digest = self._channel.pull()
779 self._pending.remove(digest)
780 self._fetched.add(digest)
781 if digest in digests:
782 return digest
783
784 # Should never reach this point due to assert above.
785 raise RuntimeError('Impossible state')
786
787 def inject_local_file(self, path, algo):
788 """Adds local file to the cache as if it was fetched from storage."""
789 with open(path, 'rb') as f:
790 data = f.read()
791 digest = algo(data).hexdigest()
792 self.cache.write(digest, [data])
793 self._fetched.add(digest)
794 return digest
795
796 @property
797 def pending_count(self):
798 """Returns number of items to be fetched."""
799 return len(self._pending)
800
801 def verify_all_cached(self):
802 """True if all accessed items are in cache."""
803 return self._accessed.issubset(self.cache.cached_set())
804
805
806class FetchStreamVerifier(object):
807 """Verifies that fetched file is valid before passing it to the LocalCache."""
808
809 def __init__(self, stream, expected_size):
810 self.stream = stream
811 self.expected_size = expected_size
812 self.current_size = 0
813
814 def run(self):
815 """Generator that yields same items as |stream|.
816
817 Verifies |stream| is complete before yielding a last chunk to consumer.
818
819 Also wraps IOError produced by consumer into MappingError exceptions since
820 otherwise Storage will retry fetch on unrelated local cache errors.
821 """
822 # Read one chunk ahead, keep it in |stored|.
823 # That way a complete stream can be verified before pushing last chunk
824 # to consumer.
825 stored = None
826 for chunk in self.stream:
827 assert chunk is not None
828 if stored is not None:
829 self._inspect_chunk(stored, is_last=False)
830 try:
831 yield stored
832 except IOError as exc:
833 raise MappingError('Failed to store an item in cache: %s' % exc)
834 stored = chunk
835 if stored is not None:
836 self._inspect_chunk(stored, is_last=True)
837 try:
838 yield stored
839 except IOError as exc:
840 raise MappingError('Failed to store an item in cache: %s' % exc)
841
842 def _inspect_chunk(self, chunk, is_last):
843 """Called for each fetched chunk before passing it to consumer."""
844 self.current_size += len(chunk)
845 if (is_last and (self.expected_size != UNKNOWN_FILE_SIZE) and
846 (self.expected_size != self.current_size)):
847 raise IOError('Incorrect file size: expected %d, got %d' % (
848 self.expected_size, self.current_size))
849
850
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000851class StorageApi(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800852 """Interface for classes that implement low-level storage operations.
853
854 StorageApi is oblivious of compression and hashing scheme used. This details
855 are handled in higher level Storage class.
856
857 Clients should generally not use StorageApi directly. Storage class is
858 preferred since it implements compression and upload optimizations.
859 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000860
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700861 @property
862 def location(self):
863 """Location of a backing store that this class is using.
864
865 Exact meaning depends on the type. For IsolateServer it is an URL of isolate
866 server, for FileSystem is it a path in file system.
867 """
868 raise NotImplementedError()
869
870 @property
871 def namespace(self):
872 """Isolate namespace used by this storage.
873
874 Indirectly defines hashing scheme and compression method used.
875 """
876 raise NotImplementedError()
877
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000878 def get_fetch_url(self, digest):
879 """Returns an URL that can be used to fetch an item with given digest.
880
881 Arguments:
882 digest: hex digest of item to fetch.
883
884 Returns:
885 An URL or None if the protocol doesn't support this.
886 """
887 raise NotImplementedError()
888
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800889 def fetch(self, digest, offset=0):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000890 """Fetches an object and yields its content.
891
892 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000893 digest: hash digest of item to download.
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800894 offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000895
896 Yields:
897 Chunks of downloaded item (as str objects).
898 """
899 raise NotImplementedError()
900
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800901 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000902 """Uploads an |item| with content generated by |content| generator.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000903
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800904 |item| MUST go through 'contains' call to get |push_state| before it can
905 be pushed to the storage.
906
907 To be clear, here is one possible usage:
908 all_items = [... all items to push as Item subclasses ...]
909 for missing_item, push_state in storage_api.contains(all_items).items():
910 storage_api.push(missing_item, push_state)
911
912 When pushing to a namespace with compression, data that should be pushed
913 and data provided by the item is not the same. In that case |content| is
914 not None and it yields chunks of compressed data (using item.content() as
915 a source of original uncompressed data). This is implemented by Storage
916 class.
917
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000918 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000919 item: Item object that holds information about an item being pushed.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800920 push_state: push state object as returned by 'contains' call.
921 content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000922
923 Returns:
924 None.
925 """
926 raise NotImplementedError()
927
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000928 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800929 """Checks for |items| on the server, prepares missing ones for upload.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000930
931 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800932 items: list of Item objects to check for presence.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000933
934 Returns:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800935 A dict missing Item -> opaque push state object to be passed to 'push'.
936 See doc string for 'push'.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000937 """
938 raise NotImplementedError()
939
940
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800941class _IsolateServerPushState(object):
942 """Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger27f03da2014-02-12 16:47:01 -0500943
944 Note this needs to be a global class to support pickling.
945 """
946
947 def __init__(self, upload_url, finalize_url):
948 self.upload_url = upload_url
949 self.finalize_url = finalize_url
950 self.uploaded = False
951 self.finalized = False
952
953
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000954class IsolateServer(StorageApi):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000955 """StorageApi implementation that downloads and uploads to Isolate Server.
956
957 It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800958 Works only within single namespace.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000959 """
960
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000961 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000962 super(IsolateServer, self).__init__()
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000963 assert base_url.startswith('http'), base_url
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700964 self._base_url = base_url.rstrip('/')
965 self._namespace = namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000966 self._lock = threading.Lock()
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000967 self._server_caps = None
968
969 @staticmethod
970 def _generate_handshake_request():
971 """Returns a dict to be sent as handshake request body."""
972 # TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
973 return {
974 'client_app_version': __version__,
975 'fetcher': True,
976 'protocol_version': ISOLATE_PROTOCOL_VERSION,
977 'pusher': True,
978 }
979
980 @staticmethod
981 def _validate_handshake_response(caps):
982 """Validates and normalizes handshake response."""
983 logging.info('Protocol version: %s', caps['protocol_version'])
984 logging.info('Server version: %s', caps['server_app_version'])
985 if caps.get('error'):
986 raise MappingError(caps['error'])
987 if not caps['access_token']:
988 raise ValueError('access_token is missing')
989 return caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000990
991 @property
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000992 def _server_capabilities(self):
993 """Performs handshake with the server if not yet done.
994
995 Returns:
996 Server capabilities dictionary as returned by /handshake endpoint.
997
998 Raises:
999 MappingError if server rejects the handshake.
1000 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +00001001 # TODO(maruel): Make this request much earlier asynchronously while the
1002 # files are being enumerated.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001003
1004 # TODO(vadimsh): Put |namespace| in the URL so that server can apply
1005 # namespace-level ACLs to this call.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001006 with self._lock:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001007 if self._server_caps is None:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001008 try:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001009 caps = net.url_read_json(
1010 url=self._base_url + '/content-gs/handshake',
1011 data=self._generate_handshake_request())
1012 if caps is None:
1013 raise MappingError('Failed to perform handshake.')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001014 if not isinstance(caps, dict):
1015 raise ValueError('Expecting JSON dict')
1016 self._server_caps = self._validate_handshake_response(caps)
1017 except (ValueError, KeyError, TypeError) as exc:
1018 # KeyError exception has very confusing str conversion: it's just a
1019 # missing key value and nothing else. So print exception class name
1020 # as well.
1021 raise MappingError('Invalid handshake response (%s): %s' % (
1022 exc.__class__.__name__, exc))
1023 return self._server_caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001024
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001025 @property
1026 def location(self):
1027 return self._base_url
1028
1029 @property
1030 def namespace(self):
1031 return self._namespace
1032
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001033 def get_fetch_url(self, digest):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001034 assert isinstance(digest, basestring)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001035 return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001036 self._base_url, self._namespace, digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001037
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001038 def fetch(self, digest, offset=0):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001039 source_url = self.get_fetch_url(digest)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001040 logging.debug('download_file(%s, %d)', source_url, offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001041
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001042 connection = net.url_open(
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001043 source_url,
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001044 read_timeout=DOWNLOAD_READ_TIMEOUT,
1045 headers={'Range': 'bytes=%d-' % offset} if offset else None)
1046
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001047 if not connection:
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001048 raise IOError('Request failed - %s' % source_url)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001049
1050 # If |offset| is used, verify server respects it by checking Content-Range.
1051 if offset:
1052 content_range = connection.get_header('Content-Range')
1053 if not content_range:
1054 raise IOError('Missing Content-Range header')
1055
1056 # 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
1057 # According to a spec, <size> can be '*' meaning "Total size of the file
1058 # is not known in advance".
1059 try:
1060 match = re.match(r'bytes (\d+)-(\d+)/(\d+|\*)', content_range)
1061 if not match:
1062 raise ValueError()
1063 content_offset = int(match.group(1))
1064 last_byte_index = int(match.group(2))
1065 size = None if match.group(3) == '*' else int(match.group(3))
1066 except ValueError:
1067 raise IOError('Invalid Content-Range header: %s' % content_range)
1068
1069 # Ensure returned offset equals requested one.
1070 if offset != content_offset:
1071 raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
1072 offset, content_offset, content_range))
1073
1074 # Ensure entire tail of the file is returned.
1075 if size is not None and last_byte_index + 1 != size:
1076 raise IOError('Incomplete response. Content-Range: %s' % content_range)
1077
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001078 return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001079
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001080 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001081 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001082 assert item.digest is not None
1083 assert item.size is not None
1084 assert isinstance(push_state, _IsolateServerPushState)
1085 assert not push_state.finalized
1086
1087 # Default to item.content().
1088 content = item.content() if content is None else content
1089
1090 # Do not iterate byte by byte over 'str'. Push it all as a single chunk.
1091 if isinstance(content, basestring):
1092 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1093 content = [content]
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001094
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001095 # TODO(vadimsh): Do not read from |content| generator when retrying push.
1096 # If |content| is indeed a generator, it can not be re-winded back
1097 # to the beginning of the stream. A retry will find it exhausted. A possible
1098 # solution is to wrap |content| generator with some sort of caching
1099 # restartable generator. It should be done alongside streaming support
1100 # implementation.
1101
1102 # This push operation may be a retry after failed finalization call below,
1103 # no need to reupload contents in that case.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001104 if not push_state.uploaded:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001105 # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
1106 # upload support is implemented.
1107 if isinstance(content, list) and len(content) == 1:
1108 content = content[0]
1109 else:
1110 content = ''.join(content)
1111 # PUT file to |upload_url|.
1112 response = net.url_read(
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001113 url=push_state.upload_url,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001114 data=content,
1115 content_type='application/octet-stream',
1116 method='PUT')
1117 if response is None:
1118 raise IOError('Failed to upload a file %s to %s' % (
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001119 item.digest, push_state.upload_url))
1120 push_state.uploaded = True
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001121 else:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001122 logging.info(
1123 'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001124
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001125 # Optionally notify the server that it's done.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001126 if push_state.finalize_url:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001127 # TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
1128 # send it to isolated server. That way isolate server can verify that
1129 # the data safely reached Google Storage (GS provides MD5 and CRC32C of
1130 # stored files).
Marc-Antoine Ruelc1c2ccc2014-08-13 19:18:49 -04001131 # TODO(maruel): Fix the server to accept propery data={} so
1132 # url_read_json() can be used.
1133 response = net.url_read(
1134 url=push_state.finalize_url,
1135 data='',
1136 content_type='application/json',
1137 method='POST')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001138 if response is None:
1139 raise IOError('Failed to finalize an upload of %s' % item.digest)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001140 push_state.finalized = True
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001141
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001142 def contains(self, items):
1143 logging.info('Checking existence of %d files...', len(items))
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001144
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001145 # Ensure all items were initialized with 'prepare' call. Storage does that.
1146 assert all(i.digest is not None and i.size is not None for i in items)
1147
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001148 # Request body is a json encoded list of dicts.
1149 body = [
1150 {
1151 'h': item.digest,
1152 's': item.size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001153 'i': int(item.high_priority),
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001154 } for item in items
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001155 ]
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001156
1157 query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001158 self._base_url,
1159 self._namespace,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001160 urllib.quote(self._server_capabilities['access_token']))
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001161
1162 # Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001163 response = None
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001164 try:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001165 response = net.url_read_json(url=query_url, data=body)
1166 if response is None:
1167 raise MappingError('Failed to execute /pre-upload query')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001168 if not isinstance(response, list):
1169 raise ValueError('Expecting response with json-encoded list')
1170 if len(response) != len(items):
1171 raise ValueError(
1172 'Incorrect number of items in the list, expected %d, '
1173 'but got %d' % (len(items), len(response)))
1174 except ValueError as err:
1175 raise MappingError(
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001176 'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001177
1178 # Pick Items that are missing, attach _PushState to them.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001179 missing_items = {}
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001180 for i, push_urls in enumerate(response):
1181 if push_urls:
1182 assert len(push_urls) == 2, str(push_urls)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001183 missing_items[items[i]] = _IsolateServerPushState(
1184 push_urls[0], push_urls[1])
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001185 logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001186 len(items), len(items) - len(missing_items))
1187 return missing_items
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001188
1189
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001190class FileSystem(StorageApi):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001191 """StorageApi implementation that fetches data from the file system.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001192
1193 The common use case is a NFS/CIFS file server that is mounted locally that is
1194 used to fetch the file on a local partition.
1195 """
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001196
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001197 # Used for push_state instead of None. That way caller is forced to
1198 # call 'contains' before 'push'. Naively passing None in 'push' will not work.
1199 _DUMMY_PUSH_STATE = object()
1200
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001201 def __init__(self, base_path, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001202 super(FileSystem, self).__init__()
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001203 self._base_path = base_path
1204 self._namespace = namespace
1205
1206 @property
1207 def location(self):
1208 return self._base_path
1209
1210 @property
1211 def namespace(self):
1212 return self._namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001213
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001214 def get_fetch_url(self, digest):
1215 return None
1216
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001217 def fetch(self, digest, offset=0):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001218 assert isinstance(digest, basestring)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001219 return file_read(os.path.join(self._base_path, digest), offset=offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001220
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001221 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001222 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001223 assert item.digest is not None
1224 assert item.size is not None
1225 assert push_state is self._DUMMY_PUSH_STATE
1226 content = item.content() if content is None else content
1227 if isinstance(content, basestring):
1228 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1229 content = [content]
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001230 file_write(os.path.join(self._base_path, item.digest), content)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001231
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001232 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001233 assert all(i.digest is not None and i.size is not None for i in items)
1234 return dict(
1235 (item, self._DUMMY_PUSH_STATE) for item in items
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001236 if not os.path.exists(os.path.join(self._base_path, item.digest))
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001237 )
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001238
1239
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001240class LocalCache(object):
1241 """Local cache that stores objects fetched via Storage.
1242
1243 It can be accessed concurrently from multiple threads, so it should protect
1244 its internal state with some lock.
1245 """
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001246 cache_dir = None
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001247
1248 def __enter__(self):
1249 """Context manager interface."""
1250 return self
1251
1252 def __exit__(self, _exc_type, _exec_value, _traceback):
1253 """Context manager interface."""
1254 return False
1255
1256 def cached_set(self):
1257 """Returns a set of all cached digests (always a new object)."""
1258 raise NotImplementedError()
1259
1260 def touch(self, digest, size):
1261 """Ensures item is not corrupted and updates its LRU position.
1262
1263 Arguments:
1264 digest: hash digest of item to check.
1265 size: expected size of this item.
1266
1267 Returns:
1268 True if item is in cache and not corrupted.
1269 """
1270 raise NotImplementedError()
1271
1272 def evict(self, digest):
1273 """Removes item from cache if it's there."""
1274 raise NotImplementedError()
1275
1276 def read(self, digest):
1277 """Returns contents of the cached item as a single str."""
1278 raise NotImplementedError()
1279
1280 def write(self, digest, content):
1281 """Reads data from |content| generator and stores it in cache."""
1282 raise NotImplementedError()
1283
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001284 def hardlink(self, digest, dest, file_mode):
1285 """Ensures file at |dest| has same content as cached |digest|.
1286
1287 If file_mode is provided, it is used to set the executable bit if
1288 applicable.
1289 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001290 raise NotImplementedError()
1291
1292
1293class MemoryCache(LocalCache):
1294 """LocalCache implementation that stores everything in memory."""
1295
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001296 def __init__(self, file_mode_mask=0500):
1297 """Args:
1298 file_mode_mask: bit mask to AND file mode with. Default value will make
1299 all mapped files to be read only.
1300 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001301 super(MemoryCache, self).__init__()
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001302 self._file_mode_mask = file_mode_mask
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001303 # Let's not assume dict is thread safe.
1304 self._lock = threading.Lock()
1305 self._contents = {}
1306
1307 def cached_set(self):
1308 with self._lock:
1309 return set(self._contents)
1310
1311 def touch(self, digest, size):
1312 with self._lock:
1313 return digest in self._contents
1314
1315 def evict(self, digest):
1316 with self._lock:
1317 self._contents.pop(digest, None)
1318
1319 def read(self, digest):
1320 with self._lock:
1321 return self._contents[digest]
1322
1323 def write(self, digest, content):
1324 # Assemble whole stream before taking the lock.
1325 data = ''.join(content)
1326 with self._lock:
1327 self._contents[digest] = data
1328
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001329 def hardlink(self, digest, dest, file_mode):
1330 """Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001331 file_write(dest, [self.read(digest)])
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001332 if file_mode is not None:
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001333 os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001334
1335
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001336def get_hash_algo(_namespace):
1337 """Return hash algorithm class to use when uploading to given |namespace|."""
1338 # TODO(vadimsh): Implement this at some point.
1339 return hashlib.sha1
1340
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001341
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001342def is_namespace_with_compression(namespace):
1343 """Returns True if given |namespace| stores compressed objects."""
1344 return namespace.endswith(('-gzip', '-deflate'))
1345
1346
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001347def get_storage_api(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001348 """Returns an object that implements low-level StorageApi interface.
1349
1350 It is used by Storage to work with single isolate |namespace|. It should
1351 rarely be used directly by clients, see 'get_storage' for
1352 a better alternative.
1353
1354 Arguments:
1355 file_or_url: a file path to use file system based storage, or URL of isolate
1356 service to use shared cloud based storage.
1357 namespace: isolate namespace to operate in, also defines hashing and
1358 compression scheme used, i.e. namespace names that end with '-gzip'
1359 store compressed data.
1360
1361 Returns:
1362 Instance of StorageApi subclass.
1363 """
Marc-Antoine Ruel37989932013-11-19 16:28:08 -05001364 if file_path.is_url(file_or_url):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001365 return IsolateServer(file_or_url, namespace)
1366 else:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001367 return FileSystem(file_or_url, namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001368
1369
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001370def get_storage(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001371 """Returns Storage class that can upload and download from |namespace|.
1372
1373 Arguments:
1374 file_or_url: a file path to use file system based storage, or URL of isolate
1375 service to use shared cloud based storage.
1376 namespace: isolate namespace to operate in, also defines hashing and
1377 compression scheme used, i.e. namespace names that end with '-gzip'
1378 store compressed data.
1379
1380 Returns:
1381 Instance of Storage.
1382 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001383 return Storage(get_storage_api(file_or_url, namespace))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001384
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001385
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001386def expand_symlinks(indir, relfile):
1387 """Follows symlinks in |relfile|, but treating symlinks that point outside the
1388 build tree as if they were ordinary directories/files. Returns the final
1389 symlink-free target and a list of paths to symlinks encountered in the
1390 process.
1391
1392 The rule about symlinks outside the build tree is for the benefit of the
1393 Chromium OS ebuild, which symlinks the output directory to an unrelated path
1394 in the chroot.
1395
1396 Fails when a directory loop is detected, although in theory we could support
1397 that case.
1398 """
1399 is_directory = relfile.endswith(os.path.sep)
1400 done = indir
1401 todo = relfile.strip(os.path.sep)
1402 symlinks = []
1403
1404 while todo:
1405 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
1406 done, todo)
1407 if not symlink:
1408 todo = file_path.fix_native_path_case(done, todo)
1409 done = os.path.join(done, todo)
1410 break
1411 symlink_path = os.path.join(done, pre_symlink, symlink)
1412 post_symlink = post_symlink.lstrip(os.path.sep)
1413 # readlink doesn't exist on Windows.
1414 # pylint: disable=E1101
1415 target = os.path.normpath(os.path.join(done, pre_symlink))
1416 symlink_target = os.readlink(symlink_path)
1417 if os.path.isabs(symlink_target):
1418 # Absolute path are considered a normal directories. The use case is
1419 # generally someone who puts the output directory on a separate drive.
1420 target = symlink_target
1421 else:
1422 # The symlink itself could be using the wrong path case.
1423 target = file_path.fix_native_path_case(target, symlink_target)
1424
1425 if not os.path.exists(target):
1426 raise MappingError(
1427 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
1428 target = file_path.get_native_path_case(target)
1429 if not file_path.path_starts_with(indir, target):
1430 done = symlink_path
1431 todo = post_symlink
1432 continue
1433 if file_path.path_starts_with(target, symlink_path):
1434 raise MappingError(
1435 'Can\'t map recursive symlink reference %s -> %s' %
1436 (symlink_path, target))
1437 logging.info('Found symlink: %s -> %s', symlink_path, target)
1438 symlinks.append(os.path.relpath(symlink_path, indir))
1439 # Treat the common prefix of the old and new paths as done, and start
1440 # scanning again.
1441 target = target.split(os.path.sep)
1442 symlink_path = symlink_path.split(os.path.sep)
1443 prefix_length = 0
1444 for target_piece, symlink_path_piece in zip(target, symlink_path):
1445 if target_piece == symlink_path_piece:
1446 prefix_length += 1
1447 else:
1448 break
1449 done = os.path.sep.join(target[:prefix_length])
1450 todo = os.path.join(
1451 os.path.sep.join(target[prefix_length:]), post_symlink)
1452
1453 relfile = os.path.relpath(done, indir)
1454 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
1455 return relfile, symlinks
1456
1457
1458def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
1459 """Expands a single input. It can result in multiple outputs.
1460
1461 This function is recursive when relfile is a directory.
1462
1463 Note: this code doesn't properly handle recursive symlink like one created
1464 with:
1465 ln -s .. foo
1466 """
1467 if os.path.isabs(relfile):
1468 raise MappingError('Can\'t map absolute path %s' % relfile)
1469
1470 infile = file_path.normpath(os.path.join(indir, relfile))
1471 if not infile.startswith(indir):
1472 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
1473
1474 filepath = os.path.join(indir, relfile)
1475 native_filepath = file_path.get_native_path_case(filepath)
1476 if filepath != native_filepath:
1477 # Special case './'.
1478 if filepath != native_filepath + '.' + os.path.sep:
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001479 # While it'd be nice to enforce path casing on Windows, it's impractical.
1480 # Also give up enforcing strict path case on OSX. Really, it's that sad.
1481 # The case where it happens is very specific and hard to reproduce:
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001482 # get_native_path_case(
1483 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
1484 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
1485 #
1486 # Note that this is really something deep in OSX because running
1487 # ls Foo.framework/Versions/A
1488 # will print out 'Resources', while file_path.get_native_path_case()
1489 # returns a lower case 'r'.
1490 #
1491 # So *something* is happening under the hood resulting in the command 'ls'
1492 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
1493 # have no idea why.
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001494 if sys.platform not in ('darwin', 'win32'):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001495 raise MappingError(
1496 'File path doesn\'t equal native file path\n%s != %s' %
1497 (filepath, native_filepath))
1498
1499 symlinks = []
1500 if follow_symlinks:
1501 relfile, symlinks = expand_symlinks(indir, relfile)
1502
1503 if relfile.endswith(os.path.sep):
1504 if not os.path.isdir(infile):
1505 raise MappingError(
1506 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
1507
1508 # Special case './'.
1509 if relfile.startswith('.' + os.path.sep):
1510 relfile = relfile[2:]
1511 outfiles = symlinks
1512 try:
1513 for filename in os.listdir(infile):
1514 inner_relfile = os.path.join(relfile, filename)
1515 if blacklist and blacklist(inner_relfile):
1516 continue
1517 if os.path.isdir(os.path.join(indir, inner_relfile)):
1518 inner_relfile += os.path.sep
1519 outfiles.extend(
1520 expand_directory_and_symlink(indir, inner_relfile, blacklist,
1521 follow_symlinks))
1522 return outfiles
1523 except OSError as e:
1524 raise MappingError(
1525 'Unable to iterate over directory %s.\n%s' % (infile, e))
1526 else:
1527 # Always add individual files even if they were blacklisted.
1528 if os.path.isdir(infile):
1529 raise MappingError(
1530 'Input directory %s must have a trailing slash' % infile)
1531
1532 if not os.path.isfile(infile):
1533 raise MappingError('Input file %s doesn\'t exist' % infile)
1534
1535 return symlinks + [relfile]
1536
1537
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001538def process_input(filepath, prevdict, read_only, algo):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001539 """Processes an input file, a dependency, and return meta data about it.
1540
1541 Behaviors:
1542 - Retrieves the file mode, file size, file timestamp, file link
1543 destination if it is a file link and calcultate the SHA-1 of the file's
1544 content if the path points to a file and not a symlink.
1545
1546 Arguments:
1547 filepath: File to act on.
1548 prevdict: the previous dictionary. It is used to retrieve the cached sha-1
1549 to skip recalculating the hash. Optional.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001550 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001551 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
1552 windows, mode is not set since all files are 'executable' by
1553 default.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001554 algo: Hashing algorithm used.
1555
1556 Returns:
1557 The necessary data to create a entry in the 'files' section of an .isolated
1558 file.
1559 """
1560 out = {}
1561 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
1562 # if prevdict.get('T') == True:
1563 # # The file's content is ignored. Skip the time and hard code mode.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001564 # out['s'] = 0
1565 # out['h'] = algo().hexdigest()
1566 # out['T'] = True
1567 # return out
1568
1569 # Always check the file stat and check if it is a link. The timestamp is used
1570 # to know if the file's content/symlink destination should be looked into.
1571 # E.g. only reuse from prevdict if the timestamp hasn't changed.
1572 # There is the risk of the file's timestamp being reset to its last value
1573 # manually while its content changed. We don't protect against that use case.
1574 try:
1575 filestats = os.lstat(filepath)
1576 except OSError:
1577 # The file is not present.
1578 raise MappingError('%s is missing' % filepath)
1579 is_link = stat.S_ISLNK(filestats.st_mode)
1580
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001581 if sys.platform != 'win32':
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001582 # Ignore file mode on Windows since it's not really useful there.
1583 filemode = stat.S_IMODE(filestats.st_mode)
1584 # Remove write access for group and all access to 'others'.
1585 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
1586 if read_only:
1587 filemode &= ~stat.S_IWUSR
1588 if filemode & stat.S_IXUSR:
1589 filemode |= stat.S_IXGRP
1590 else:
1591 filemode &= ~stat.S_IXGRP
1592 if not is_link:
1593 out['m'] = filemode
1594
1595 # Used to skip recalculating the hash or link destination. Use the most recent
1596 # update time.
1597 # TODO(maruel): Save it in the .state file instead of .isolated so the
1598 # .isolated file is deterministic.
1599 out['t'] = int(round(filestats.st_mtime))
1600
1601 if not is_link:
1602 out['s'] = filestats.st_size
1603 # If the timestamp wasn't updated and the file size is still the same, carry
1604 # on the sha-1.
1605 if (prevdict.get('t') == out['t'] and
1606 prevdict.get('s') == out['s']):
1607 # Reuse the previous hash if available.
1608 out['h'] = prevdict.get('h')
1609 if not out.get('h'):
1610 out['h'] = hash_file(filepath, algo)
1611 else:
1612 # If the timestamp wasn't updated, carry on the link destination.
1613 if prevdict.get('t') == out['t']:
1614 # Reuse the previous link destination if available.
1615 out['l'] = prevdict.get('l')
1616 if out.get('l') is None:
1617 # The link could be in an incorrect path case. In practice, this only
1618 # happen on OSX on case insensitive HFS.
1619 # TODO(maruel): It'd be better if it was only done once, in
1620 # expand_directory_and_symlink(), so it would not be necessary to do again
1621 # here.
1622 symlink_value = os.readlink(filepath) # pylint: disable=E1101
1623 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
1624 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
1625 out['l'] = os.path.relpath(native_dest, filedir)
1626 return out
1627
1628
1629def save_isolated(isolated, data):
1630 """Writes one or multiple .isolated files.
1631
1632 Note: this reference implementation does not create child .isolated file so it
1633 always returns an empty list.
1634
1635 Returns the list of child isolated files that are included by |isolated|.
1636 """
1637 # Make sure the data is valid .isolated data by 'reloading' it.
1638 algo = SUPPORTED_ALGOS[data['algo']]
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001639 load_isolated(json.dumps(data), algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001640 tools.write_json(isolated, data, True)
1641 return []
1642
1643
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001644def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001645 """Uploads the given tree to the given url.
1646
1647 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001648 base_url: The base url, it is assume that |base_url|/has/ can be used to
1649 query if an element was already uploaded, and |base_url|/store/
1650 can be used to upload a new element.
1651 indir: Root directory the infiles are based in.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001652 infiles: dict of files to upload from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001653 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001654 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001655 logging.info('upload_tree(indir=%s, files=%d)', indir, len(infiles))
1656
1657 # Convert |indir| + |infiles| into a list of FileItem objects.
1658 # Filter out symlinks, since they are not represented by items on isolate
1659 # server side.
1660 items = [
1661 FileItem(
1662 path=os.path.join(indir, filepath),
1663 digest=metadata['h'],
1664 size=metadata['s'],
1665 high_priority=metadata.get('priority') == '0')
1666 for filepath, metadata in infiles.iteritems()
1667 if 'l' not in metadata
1668 ]
1669
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001670 with get_storage(base_url, namespace) as storage:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001671 storage.upload_items(items)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001672 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001673
1674
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001675def load_isolated(content, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001676 """Verifies the .isolated file is valid and loads this object with the json
1677 data.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001678
1679 Arguments:
1680 - content: raw serialized content to load.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001681 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
1682 algorithm used on the Isolate Server.
maruel@chromium.org41601642013-09-18 19:40:46 +00001683 """
1684 try:
1685 data = json.loads(content)
1686 except ValueError:
1687 raise ConfigError('Failed to parse: %s...' % content[:100])
1688
1689 if not isinstance(data, dict):
1690 raise ConfigError('Expected dict, got %r' % data)
1691
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001692 # Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001693 value = data.get('version', '1.0')
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001694 if not isinstance(value, basestring):
1695 raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001696 try:
1697 version = tuple(map(int, value.split('.')))
1698 except ValueError:
1699 raise ConfigError('Expected valid version, got %r' % value)
1700
1701 expected_version = tuple(map(int, ISOLATED_FILE_VERSION.split('.')))
1702 # Major version must match.
1703 if version[0] != expected_version[0]:
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -05001704 raise ConfigError(
1705 'Expected compatible \'%s\' version, got %r' %
1706 (ISOLATED_FILE_VERSION, value))
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001707
1708 if algo is None:
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05001709 # TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001710 # Default the algorithm used in the .isolated file itself, falls back to
1711 # 'sha-1' if unspecified.
1712 algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
1713
maruel@chromium.org41601642013-09-18 19:40:46 +00001714 for key, value in data.iteritems():
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001715 if key == 'algo':
1716 if not isinstance(value, basestring):
1717 raise ConfigError('Expected string, got %r' % value)
1718 if value not in SUPPORTED_ALGOS:
1719 raise ConfigError(
1720 'Expected one of \'%s\', got %r' %
1721 (', '.join(sorted(SUPPORTED_ALGOS)), value))
1722 if value != SUPPORTED_ALGOS_REVERSE[algo]:
1723 raise ConfigError(
1724 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
1725
1726 elif key == 'command':
maruel@chromium.org41601642013-09-18 19:40:46 +00001727 if not isinstance(value, list):
1728 raise ConfigError('Expected list, got %r' % value)
1729 if not value:
1730 raise ConfigError('Expected non-empty command')
1731 for subvalue in value:
1732 if not isinstance(subvalue, basestring):
1733 raise ConfigError('Expected string, got %r' % subvalue)
1734
1735 elif key == 'files':
1736 if not isinstance(value, dict):
1737 raise ConfigError('Expected dict, got %r' % value)
1738 for subkey, subvalue in value.iteritems():
1739 if not isinstance(subkey, basestring):
1740 raise ConfigError('Expected string, got %r' % subkey)
1741 if not isinstance(subvalue, dict):
1742 raise ConfigError('Expected dict, got %r' % subvalue)
1743 for subsubkey, subsubvalue in subvalue.iteritems():
1744 if subsubkey == 'l':
1745 if not isinstance(subsubvalue, basestring):
1746 raise ConfigError('Expected string, got %r' % subsubvalue)
1747 elif subsubkey == 'm':
1748 if not isinstance(subsubvalue, int):
1749 raise ConfigError('Expected int, got %r' % subsubvalue)
1750 elif subsubkey == 'h':
1751 if not is_valid_hash(subsubvalue, algo):
1752 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
1753 elif subsubkey == 's':
Marc-Antoine Ruelaab3a622013-11-28 09:47:05 -05001754 if not isinstance(subsubvalue, (int, long)):
1755 raise ConfigError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001756 else:
1757 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001758 if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org41601642013-09-18 19:40:46 +00001759 raise ConfigError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001760 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
1761 subvalue)
1762 if bool('h' in subvalue) != bool('s' in subvalue):
1763 raise ConfigError(
1764 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
1765 subvalue)
1766 if bool('s' in subvalue) == bool('l' in subvalue):
1767 raise ConfigError(
1768 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
1769 subvalue)
1770 if bool('l' in subvalue) and bool('m' in subvalue):
1771 raise ConfigError(
1772 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org41601642013-09-18 19:40:46 +00001773 subvalue)
1774
1775 elif key == 'includes':
1776 if not isinstance(value, list):
1777 raise ConfigError('Expected list, got %r' % value)
1778 if not value:
1779 raise ConfigError('Expected non-empty includes list')
1780 for subvalue in value:
1781 if not is_valid_hash(subvalue, algo):
1782 raise ConfigError('Expected sha-1, got %r' % subvalue)
1783
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001784 elif key == 'os':
1785 if version >= (1, 4):
1786 raise ConfigError('Key \'os\' is not allowed starting version 1.4')
1787
maruel@chromium.org41601642013-09-18 19:40:46 +00001788 elif key == 'read_only':
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001789 if not value in (0, 1, 2):
1790 raise ConfigError('Expected 0, 1 or 2, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001791
1792 elif key == 'relative_cwd':
1793 if not isinstance(value, basestring):
1794 raise ConfigError('Expected string, got %r' % value)
1795
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001796 elif key == 'version':
1797 # Already checked above.
1798 pass
1799
maruel@chromium.org41601642013-09-18 19:40:46 +00001800 else:
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001801 raise ConfigError('Unknown key %r' % key)
maruel@chromium.org41601642013-09-18 19:40:46 +00001802
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001803 # Automatically fix os.path.sep if necessary. While .isolated files are always
1804 # in the the native path format, someone could want to download an .isolated
1805 # tree from another OS.
1806 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
1807 if 'files' in data:
1808 data['files'] = dict(
1809 (k.replace(wrong_path_sep, os.path.sep), v)
1810 for k, v in data['files'].iteritems())
1811 for v in data['files'].itervalues():
1812 if 'l' in v:
1813 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
1814 if 'relative_cwd' in data:
1815 data['relative_cwd'] = data['relative_cwd'].replace(
1816 wrong_path_sep, os.path.sep)
maruel@chromium.org41601642013-09-18 19:40:46 +00001817 return data
1818
1819
1820class IsolatedFile(object):
1821 """Represents a single parsed .isolated file."""
1822 def __init__(self, obj_hash, algo):
1823 """|obj_hash| is really the sha-1 of the file."""
1824 logging.debug('IsolatedFile(%s)' % obj_hash)
1825 self.obj_hash = obj_hash
1826 self.algo = algo
1827 # Set once all the left-side of the tree is parsed. 'Tree' here means the
1828 # .isolate and all the .isolated files recursively included by it with
1829 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1830 # .isolated file in the hash table, is important, as the later ones are not
1831 # processed until the firsts are retrieved and read.
1832 self.can_fetch = False
1833
1834 # Raw data.
1835 self.data = {}
1836 # A IsolatedFile instance, one per object in self.includes.
1837 self.children = []
1838
1839 # Set once the .isolated file is loaded.
1840 self._is_parsed = False
1841 # Set once the files are fetched.
1842 self.files_fetched = False
1843
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001844 def load(self, content):
maruel@chromium.org41601642013-09-18 19:40:46 +00001845 """Verifies the .isolated file is valid and loads this object with the json
1846 data.
1847 """
1848 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1849 assert not self._is_parsed
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001850 self.data = load_isolated(content, self.algo)
maruel@chromium.org41601642013-09-18 19:40:46 +00001851 self.children = [
1852 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
1853 ]
1854 self._is_parsed = True
1855
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001856 def fetch_files(self, fetch_queue, files):
maruel@chromium.org41601642013-09-18 19:40:46 +00001857 """Adds files in this .isolated file not present in |files| dictionary.
1858
1859 Preemptively request files.
1860
1861 Note that |files| is modified by this function.
1862 """
1863 assert self.can_fetch
1864 if not self._is_parsed or self.files_fetched:
1865 return
1866 logging.debug('fetch_files(%s)' % self.obj_hash)
1867 for filepath, properties in self.data.get('files', {}).iteritems():
1868 # Root isolated has priority on the files being mapped. In particular,
1869 # overriden files must not be fetched.
1870 if filepath not in files:
1871 files[filepath] = properties
1872 if 'h' in properties:
1873 # Preemptively request files.
1874 logging.debug('fetching %s' % filepath)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001875 fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
maruel@chromium.org41601642013-09-18 19:40:46 +00001876 self.files_fetched = True
1877
1878
1879class Settings(object):
1880 """Results of a completely parsed .isolated file."""
1881 def __init__(self):
1882 self.command = []
1883 self.files = {}
1884 self.read_only = None
1885 self.relative_cwd = None
1886 # The main .isolated file, a IsolatedFile instance.
1887 self.root = None
1888
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001889 def load(self, fetch_queue, root_isolated_hash, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001890 """Loads the .isolated and all the included .isolated asynchronously.
1891
1892 It enables support for "included" .isolated files. They are processed in
1893 strict order but fetched asynchronously from the cache. This is important so
1894 that a file in an included .isolated file that is overridden by an embedding
1895 .isolated file is not fetched needlessly. The includes are fetched in one
1896 pass and the files are fetched as soon as all the ones on the left-side
1897 of the tree were fetched.
1898
1899 The prioritization is very important here for nested .isolated files.
1900 'includes' have the highest priority and the algorithm is optimized for both
1901 deep and wide trees. A deep one is a long link of .isolated files referenced
1902 one at a time by one item in 'includes'. A wide one has a large number of
1903 'includes' in a single .isolated file. 'left' is defined as an included
1904 .isolated file earlier in the 'includes' list. So the order of the elements
1905 in 'includes' is important.
1906 """
1907 self.root = IsolatedFile(root_isolated_hash, algo)
1908
1909 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1910 pending = {}
1911 # Set of hashes of already retrieved items to refuse recursive includes.
1912 seen = set()
1913
1914 def retrieve(isolated_file):
1915 h = isolated_file.obj_hash
1916 if h in seen:
1917 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1918 assert h not in pending
1919 seen.add(h)
1920 pending[h] = isolated_file
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001921 fetch_queue.add(h, priority=WorkerPool.HIGH)
maruel@chromium.org41601642013-09-18 19:40:46 +00001922
1923 retrieve(self.root)
1924
1925 while pending:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001926 item_hash = fetch_queue.wait(pending)
maruel@chromium.org41601642013-09-18 19:40:46 +00001927 item = pending.pop(item_hash)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001928 item.load(fetch_queue.cache.read(item_hash))
maruel@chromium.org41601642013-09-18 19:40:46 +00001929 if item_hash == root_isolated_hash:
1930 # It's the root item.
1931 item.can_fetch = True
1932
1933 for new_child in item.children:
1934 retrieve(new_child)
1935
1936 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001937 self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org41601642013-09-18 19:40:46 +00001938
1939 def check(n):
1940 return all(check(x) for x in n.children) and n.files_fetched
1941 assert check(self.root)
1942
1943 self.relative_cwd = self.relative_cwd or ''
maruel@chromium.org41601642013-09-18 19:40:46 +00001944
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001945 def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org41601642013-09-18 19:40:46 +00001946 if node.can_fetch:
1947 if not node.files_fetched:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001948 self._update_self(fetch_queue, node)
maruel@chromium.org41601642013-09-18 19:40:46 +00001949 will_break = False
1950 for i in node.children:
1951 if not i.can_fetch:
1952 if will_break:
1953 break
1954 # Automatically mark the first one as fetcheable.
1955 i.can_fetch = True
1956 will_break = True
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001957 self._traverse_tree(fetch_queue, i)
maruel@chromium.org41601642013-09-18 19:40:46 +00001958
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001959 def _update_self(self, fetch_queue, node):
1960 node.fetch_files(fetch_queue, self.files)
maruel@chromium.org41601642013-09-18 19:40:46 +00001961 # Grabs properties.
1962 if not self.command and node.data.get('command'):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001963 # Ensure paths are correctly separated on windows.
maruel@chromium.org41601642013-09-18 19:40:46 +00001964 self.command = node.data['command']
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001965 if self.command:
1966 self.command[0] = self.command[0].replace('/', os.path.sep)
1967 self.command = tools.fix_python_path(self.command)
maruel@chromium.org41601642013-09-18 19:40:46 +00001968 if self.read_only is None and node.data.get('read_only') is not None:
1969 self.read_only = node.data['read_only']
1970 if (self.relative_cwd is None and
1971 node.data.get('relative_cwd') is not None):
1972 self.relative_cwd = node.data['relative_cwd']
1973
1974
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001975def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001976 """Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001977
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001978 Arguments:
1979 isolated_hash: hash of the root *.isolated file.
1980 storage: Storage class that communicates with isolate storage.
1981 cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001982 outdir: Output directory to map file tree to.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001983 require_command: Ensure *.isolated specifies a command to run.
1984
1985 Returns:
1986 Settings object that holds details about loaded *.isolated file.
1987 """
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001988 logging.debug(
1989 'fetch_isolated(%s, %s, %s, %s, %s)',
1990 isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001991 # Hash algorithm to use, defined by namespace |storage| is using.
1992 algo = storage.hash_algo
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001993 with cache:
1994 fetch_queue = FetchQueue(storage, cache)
1995 settings = Settings()
1996
1997 with tools.Profiler('GetIsolateds'):
1998 # Optionally support local files by manually adding them to cache.
1999 if not is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04002000 logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
2001 try:
2002 isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
2003 except IOError:
2004 raise MappingError(
2005 '%s doesn\'t seem to be a valid file. Did you intent to pass a '
2006 'valid hash?' % isolated_hash)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002007
2008 # Load all *.isolated and start loading rest of the files.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04002009 settings.load(fetch_queue, isolated_hash, algo)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002010 if require_command and not settings.command:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002011 # TODO(vadimsh): All fetch operations are already enqueue and there's no
2012 # easy way to cancel them.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002013 raise ConfigError('No command to run')
2014
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002015 with tools.Profiler('GetRest'):
2016 # Create file system hierarchy.
2017 if not os.path.isdir(outdir):
2018 os.makedirs(outdir)
2019 create_directories(outdir, settings.files)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -05002020 create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002021
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002022 # Ensure working directory exists.
2023 cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
2024 if not os.path.isdir(cwd):
2025 os.makedirs(cwd)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002026
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002027 # Multimap: digest -> list of pairs (path, props).
2028 remaining = {}
2029 for filepath, props in settings.files.iteritems():
2030 if 'h' in props:
2031 remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002032
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002033 # Now block on the remaining files to be downloaded and mapped.
2034 logging.info('Retrieving remaining files (%d of them)...',
2035 fetch_queue.pending_count)
2036 last_update = time.time()
2037 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
2038 while remaining:
2039 detector.ping()
2040
2041 # Wait for any item to finish fetching to cache.
2042 digest = fetch_queue.wait(remaining)
2043
2044 # Link corresponding files to a fetched item in cache.
2045 for filepath, props in remaining.pop(digest):
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05002046 cache.hardlink(
2047 digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002048
2049 # Report progress.
2050 duration = time.time() - last_update
2051 if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
2052 msg = '%d files remaining...' % len(remaining)
2053 print msg
2054 logging.info(msg)
2055 last_update = time.time()
2056
2057 # Cache could evict some items we just tried to fetch, it's a fatal error.
2058 if not fetch_queue.verify_all_cached():
2059 raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002060 return settings
2061
2062
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002063def directory_to_metadata(root, algo, blacklist):
2064 """Returns the FileItem list and .isolated metadata for a directory."""
2065 root = file_path.get_native_path_case(root)
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002066 paths = expand_directory_and_symlink(
2067 root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002068 metadata = dict(
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04002069 (relpath, process_input(os.path.join(root, relpath), {}, False, algo))
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002070 for relpath in paths
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002071 )
2072 for v in metadata.itervalues():
2073 v.pop('t')
2074 items = [
2075 FileItem(
2076 path=os.path.join(root, relpath),
2077 digest=meta['h'],
2078 size=meta['s'],
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002079 high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002080 for relpath, meta in metadata.iteritems() if 'h' in meta
2081 ]
2082 return items, metadata
2083
2084
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002085def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002086 """Stores every entries and returns the relevant data.
2087
2088 Arguments:
2089 storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002090 files: list of file paths to upload. If a directory is specified, a
2091 .isolated file is created and its hash is returned.
2092 blacklist: function that returns True if a file should be omitted.
2093 """
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002094 assert all(isinstance(i, unicode) for i in files), files
2095 if len(files) != len(set(map(os.path.abspath, files))):
2096 raise Error('Duplicate entries found.')
2097
2098 results = []
2099 # The temporary directory is only created as needed.
2100 tempdir = None
2101 try:
2102 # TODO(maruel): Yield the files to a worker thread.
2103 items_to_upload = []
2104 for f in files:
2105 try:
2106 filepath = os.path.abspath(f)
2107 if os.path.isdir(filepath):
2108 # Uploading a whole directory.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002109 items, metadata = directory_to_metadata(
2110 filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002111
2112 # Create the .isolated file.
2113 if not tempdir:
2114 tempdir = tempfile.mkdtemp(prefix='isolateserver')
2115 handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
2116 os.close(handle)
2117 data = {
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002118 'algo': SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002119 'files': metadata,
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -05002120 'version': ISOLATED_FILE_VERSION,
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002121 }
2122 save_isolated(isolated, data)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002123 h = hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002124 items_to_upload.extend(items)
2125 items_to_upload.append(
2126 FileItem(
2127 path=isolated,
2128 digest=h,
2129 size=os.stat(isolated).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002130 high_priority=True))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002131 results.append((h, f))
2132
2133 elif os.path.isfile(filepath):
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002134 h = hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002135 items_to_upload.append(
2136 FileItem(
2137 path=filepath,
2138 digest=h,
2139 size=os.stat(filepath).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002140 high_priority=f.endswith('.isolated')))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002141 results.append((h, f))
2142 else:
2143 raise Error('%s is neither a file or directory.' % f)
2144 except OSError:
2145 raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002146 # Technically we would care about which files were uploaded but we don't
2147 # much in practice.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002148 _uploaded_files = storage.upload_items(items_to_upload)
2149 return results
2150 finally:
2151 if tempdir:
2152 shutil.rmtree(tempdir)
2153
2154
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002155def archive(out, namespace, files, blacklist):
2156 if files == ['-']:
2157 files = sys.stdin.readlines()
2158
2159 if not files:
2160 raise Error('Nothing to upload')
2161
2162 files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002163 blacklist = tools.gen_blacklist(blacklist)
2164 with get_storage(out, namespace) as storage:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002165 results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002166 print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
2167
2168
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002169@subcommand.usage('<file1..fileN> or - to read from stdin')
2170def CMDarchive(parser, args):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002171 """Archives data to the server.
2172
2173 If a directory is specified, a .isolated file is created the whole directory
2174 is uploaded. Then this .isolated file can be included in another one to run
2175 commands.
2176
2177 The commands output each file that was processed with its content hash. For
2178 directories, the .isolated generated for the directory is listed as the
2179 directory entry itself.
2180 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002181 add_isolate_server_options(parser, False)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002182 parser.add_option(
2183 '--blacklist',
2184 action='append', default=list(DEFAULT_BLACKLIST),
2185 help='List of regexp to use as blacklist filter when uploading '
2186 'directories')
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002187 options, files = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002188 process_isolate_server_options(parser, options)
Vadim Shtayura6b555c12014-07-23 16:22:18 -07002189 if file_path.is_url(options.isolate_server):
2190 auth.ensure_logged_in(options.isolate_server)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002191 try:
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002192 archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002193 except Error as e:
2194 parser.error(e.args[0])
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002195 return 0
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002196
2197
2198def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002199 """Download data from the server.
2200
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002201 It can either download individual files or a complete tree from a .isolated
2202 file.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002203 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002204 add_isolate_server_options(parser, True)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002205 parser.add_option(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002206 '-i', '--isolated', metavar='HASH',
2207 help='hash of an isolated file, .isolated file content is discarded, use '
2208 '--file if you need it')
2209 parser.add_option(
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002210 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
2211 help='hash and destination of a file, can be used multiple times')
2212 parser.add_option(
2213 '-t', '--target', metavar='DIR', default=os.getcwd(),
2214 help='destination directory')
2215 options, args = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002216 process_isolate_server_options(parser, options)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002217 if args:
2218 parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002219 if bool(options.isolated) == bool(options.file):
2220 parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002221
2222 options.target = os.path.abspath(options.target)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002223
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002224 remote = options.isolate_server or options.indir
Vadim Shtayura6b555c12014-07-23 16:22:18 -07002225 if file_path.is_url(remote):
2226 auth.ensure_logged_in(remote)
2227
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002228 with get_storage(remote, options.namespace) as storage:
Vadim Shtayura3172be52013-12-03 12:49:05 -08002229 # Fetching individual files.
2230 if options.file:
2231 channel = threading_utils.TaskChannel()
2232 pending = {}
2233 for digest, dest in options.file:
2234 pending[digest] = dest
2235 storage.async_fetch(
2236 channel,
2237 WorkerPool.MED,
2238 digest,
2239 UNKNOWN_FILE_SIZE,
2240 functools.partial(file_write, os.path.join(options.target, dest)))
2241 while pending:
2242 fetched = channel.pull()
2243 dest = pending.pop(fetched)
2244 logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002245
Vadim Shtayura3172be52013-12-03 12:49:05 -08002246 # Fetching whole isolated tree.
2247 if options.isolated:
2248 settings = fetch_isolated(
2249 isolated_hash=options.isolated,
2250 storage=storage,
2251 cache=MemoryCache(),
Vadim Shtayura3172be52013-12-03 12:49:05 -08002252 outdir=options.target,
Vadim Shtayura3172be52013-12-03 12:49:05 -08002253 require_command=False)
2254 rel = os.path.join(options.target, settings.relative_cwd)
2255 print('To run this test please run from the directory %s:' %
2256 os.path.join(options.target, rel))
2257 print(' ' + ' '.join(settings.command))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002258
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002259 return 0
2260
2261
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002262@subcommand.usage('<file1..fileN> or - to read from stdin')
2263def CMDhashtable(parser, args):
2264 """Archives data to a hashtable on the file system.
2265
2266 If a directory is specified, a .isolated file is created the whole directory
2267 is uploaded. Then this .isolated file can be included in another one to run
2268 commands.
2269
2270 The commands output each file that was processed with its content hash. For
2271 directories, the .isolated generated for the directory is listed as the
2272 directory entry itself.
2273 """
2274 add_outdir_options(parser)
2275 parser.add_option(
2276 '--blacklist',
2277 action='append', default=list(DEFAULT_BLACKLIST),
2278 help='List of regexp to use as blacklist filter when uploading '
2279 'directories')
2280 options, files = parser.parse_args(args)
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002281 process_outdir_options(parser, options, os.getcwd())
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002282 try:
2283 # Do not compress files when archiving to the file system.
2284 archive(options.outdir, 'default', files, options.blacklist)
2285 except Error as e:
2286 parser.error(e.args[0])
2287 return 0
2288
2289
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002290def add_isolate_server_options(parser, add_indir):
2291 """Adds --isolate-server and --namespace options to parser.
2292
2293 Includes --indir if desired.
2294 """
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002295 parser.add_option(
2296 '-I', '--isolate-server',
2297 metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002298 help='URL of the Isolate Server to use. Defaults to the environment '
2299 'variable ISOLATE_SERVER if set. No need to specify https://, this '
2300 'is assumed.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002301 parser.add_option(
2302 '--namespace', default='default-gzip',
2303 help='The namespace to use on the Isolate Server, default: %default')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002304 if add_indir:
2305 parser.add_option(
2306 '--indir', metavar='DIR',
2307 help='Directory used to store the hashtable instead of using an '
2308 'isolate server.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002309
2310
2311def process_isolate_server_options(parser, options):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002312 """Processes the --isolate-server and --indir options and aborts if neither is
2313 specified.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002314 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002315 has_indir = hasattr(options, 'indir')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002316 if not options.isolate_server:
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002317 if not has_indir:
2318 parser.error('--isolate-server is required.')
2319 elif not options.indir:
2320 parser.error('Use one of --indir or --isolate-server.')
2321 else:
2322 if has_indir and options.indir:
2323 parser.error('Use only one of --indir or --isolate-server.')
2324
2325 if options.isolate_server:
2326 parts = urlparse.urlparse(options.isolate_server, 'https')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002327 if parts.query:
2328 parser.error('--isolate-server doesn\'t support query parameter.')
2329 if parts.fragment:
2330 parser.error('--isolate-server doesn\'t support fragment in the url.')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002331 # urlparse('foo.com') will result in netloc='', path='foo.com', which is not
2332 # what is desired here.
2333 new = list(parts)
2334 if not new[1] and new[2]:
2335 new[1] = new[2].rstrip('/')
2336 new[2] = ''
2337 new[2] = new[2].rstrip('/')
2338 options.isolate_server = urlparse.urlunparse(new)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04002339 on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002340 return
2341
2342 if file_path.is_url(options.indir):
2343 parser.error('Can\'t use an URL for --indir.')
2344 options.indir = unicode(options.indir).replace('/', os.path.sep)
2345 options.indir = os.path.abspath(
2346 os.path.normpath(os.path.join(os.getcwd(), options.indir)))
2347 if not os.path.isdir(options.indir):
2348 parser.error('Path given to --indir must exist.')
2349
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002350
2351
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002352def add_outdir_options(parser):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002353 """Adds --outdir, which is orthogonal to --isolate-server.
2354
2355 Note: On upload, separate commands are used between 'archive' and 'hashtable'.
2356 On 'download', the same command can download from either an isolate server or
2357 a file system.
2358 """
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002359 parser.add_option(
2360 '-o', '--outdir', metavar='DIR',
2361 help='Directory used to recreate the tree.')
2362
2363
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002364def process_outdir_options(parser, options, cwd):
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002365 if not options.outdir:
2366 parser.error('--outdir is required.')
2367 if file_path.is_url(options.outdir):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002368 parser.error('Can\'t use an URL for --outdir.')
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002369 options.outdir = unicode(options.outdir).replace('/', os.path.sep)
2370 # outdir doesn't need native path case since tracing is never done from there.
2371 options.outdir = os.path.abspath(
2372 os.path.normpath(os.path.join(cwd, options.outdir)))
2373 # In theory, we'd create the directory outdir right away. Defer doing it in
2374 # case there's errors in the command line.
2375
2376
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002377class OptionParserIsolateServer(tools.OptionParserWithLogging):
2378 def __init__(self, **kwargs):
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002379 tools.OptionParserWithLogging.__init__(
2380 self,
2381 version=__version__,
2382 prog=os.path.basename(sys.modules[__name__].__file__),
2383 **kwargs)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08002384 auth.add_auth_options(self)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002385
2386 def parse_args(self, *args, **kwargs):
2387 options, args = tools.OptionParserWithLogging.parse_args(
2388 self, *args, **kwargs)
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08002389 auth.process_auth_options(self, options)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002390 return options, args
2391
2392
2393def main(args):
2394 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04002395 return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00002396
2397
2398if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002399 fix_encoding.fix_encoding()
2400 tools.disable_buffering()
2401 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002402 sys.exit(main(sys.argv[1:]))