blob: 25bf34a512f12e87bc508696089567b88d1066c4 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00005
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05006"""Archives a set of files or directories to a server."""
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00007
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04008__version__ = '0.3.4'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +000010import functools
maruel@chromium.org41601642013-09-18 19:40:46 +000011import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000012import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000013import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000014import re
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050015import shutil
16import stat
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000017import sys
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050018import tempfile
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000019import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000020import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000021import urllib
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050022import urlparse
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000023import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000024
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000025from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
28
Marc-Antoine Ruel37989932013-11-19 16:28:08 -050029from utils import file_path
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040031from utils import on_error
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000032from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000033from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000034
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080035import auth
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040036import isolated_format
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080037
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000038
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000039# Version of isolate protocol passed to the server in /handshake request.
40ISOLATE_PROTOCOL_VERSION = '1.0'
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000041
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000042
43# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000044# All files are sorted by likelihood of a change in the file content
45# (currently file size is used to estimate this: larger the file -> larger the
46# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000047# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000048# and so on. Numbers here is a trade-off; the more per request, the lower the
49# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
50# larger values cause longer lookups, increasing the initial latency to start
51# uploading, which is especially an issue for large files. This value is
52# optimized for the "few thousands files to look up with minimal number of large
53# files missing" case.
54ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000055
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000056
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000057# A list of already compressed extension types that should not receive any
58# compression before being uploaded.
59ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel7f234c82014-08-06 21:55:18 -040060 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
61 'png', 'wav', 'zip',
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000062]
63
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000064
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000065# The file size to be used when we don't know the correct file size,
66# generally used for .isolated files.
67UNKNOWN_FILE_SIZE = None
68
69
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000070# Chunk size to use when reading from network stream.
71NET_IO_FILE_CHUNK = 16 * 1024
72
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000073
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000074# Read timeout in seconds for downloads from isolate storage. If there's no
75# response from the server within this timeout whole download will be aborted.
76DOWNLOAD_READ_TIMEOUT = 60
77
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +000078# Maximum expected delay (in seconds) between successive file fetches
79# in run_tha_test. If it takes longer than that, a deadlock might be happening
80# and all stack frames for all threads are dumped to log.
81DEADLOCK_TIMEOUT = 5 * 60
82
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000083
maruel@chromium.org41601642013-09-18 19:40:46 +000084# The delay (in seconds) to wait between logging statements when retrieving
85# the required files. This is intended to let the user (or buildbot) know that
86# the program is still running.
87DELAY_BETWEEN_UPDATES_IN_SECS = 30
88
89
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -050090DEFAULT_BLACKLIST = (
91 # Temporary vim or python files.
92 r'^.+\.(?:pyc|swp)$',
93 # .git or .svn directory.
94 r'^(?:.+' + re.escape(os.path.sep) + r'|)\.(?:git|svn)$',
95)
96
97
98# Chromium-specific.
99DEFAULT_BLACKLIST += (
100 r'^.+\.(?:run_test_cases)$',
101 r'^(?:.+' + re.escape(os.path.sep) + r'|)testserver\.log$',
102)
103
104
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -0500105class Error(Exception):
106 """Generic runtime error."""
107 pass
108
109
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000110class ConfigError(ValueError):
111 """Generic failure to load a .isolated file."""
112 pass
113
114
115class MappingError(OSError):
116 """Failed to recreate the tree."""
117 pass
118
119
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000120def stream_read(stream, chunk_size):
121 """Reads chunks from |stream| and yields them."""
122 while True:
123 data = stream.read(chunk_size)
124 if not data:
125 break
126 yield data
127
128
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400129def file_read(filepath, chunk_size=isolated_format.DISK_FILE_CHUNK, offset=0):
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800130 """Yields file content in chunks of |chunk_size| starting from |offset|."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000131 with open(filepath, 'rb') as f:
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800132 if offset:
133 f.seek(offset)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000134 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000135 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000136 if not data:
137 break
138 yield data
139
140
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000141def file_write(filepath, content_generator):
142 """Writes file content as generated by content_generator.
143
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000144 Creates the intermediary directory as needed.
145
146 Returns the number of bytes written.
147
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000148 Meant to be mocked out in unit tests.
149 """
150 filedir = os.path.dirname(filepath)
151 if not os.path.isdir(filedir):
152 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000153 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000154 with open(filepath, 'wb') as f:
155 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000156 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000157 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000158 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000159
160
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000161def zip_compress(content_generator, level=7):
162 """Reads chunks from |content_generator| and yields zip compressed chunks."""
163 compressor = zlib.compressobj(level)
164 for chunk in content_generator:
165 compressed = compressor.compress(chunk)
166 if compressed:
167 yield compressed
168 tail = compressor.flush(zlib.Z_FINISH)
169 if tail:
170 yield tail
171
172
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400173def zip_decompress(
174 content_generator, chunk_size=isolated_format.DISK_FILE_CHUNK):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000175 """Reads zipped data from |content_generator| and yields decompressed data.
176
177 Decompresses data in small chunks (no larger than |chunk_size|) so that
178 zip bomb file doesn't cause zlib to preallocate huge amount of memory.
179
180 Raises IOError if data is corrupted or incomplete.
181 """
182 decompressor = zlib.decompressobj()
183 compressed_size = 0
184 try:
185 for chunk in content_generator:
186 compressed_size += len(chunk)
187 data = decompressor.decompress(chunk, chunk_size)
188 if data:
189 yield data
190 while decompressor.unconsumed_tail:
191 data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
192 if data:
193 yield data
194 tail = decompressor.flush()
195 if tail:
196 yield tail
197 except zlib.error as e:
198 raise IOError(
199 'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
200 # Ensure all data was read and decompressed.
201 if decompressor.unused_data or decompressor.unconsumed_tail:
202 raise IOError('Not all data was decompressed')
203
204
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000205def get_zip_compression_level(filename):
206 """Given a filename calculates the ideal zip compression level to use."""
207 file_ext = os.path.splitext(filename)[1].lower()
208 # TODO(csharp): Profile to find what compression level works best.
209 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
210
211
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000212def create_directories(base_directory, files):
213 """Creates the directory structure needed by the given list of files."""
214 logging.debug('create_directories(%s, %d)', base_directory, len(files))
215 # Creates the tree of directories to create.
216 directories = set(os.path.dirname(f) for f in files)
217 for item in list(directories):
218 while item:
219 directories.add(item)
220 item = os.path.dirname(item)
221 for d in sorted(directories):
222 if d:
223 os.mkdir(os.path.join(base_directory, d))
224
225
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500226def create_symlinks(base_directory, files):
227 """Creates any symlinks needed by the given set of files."""
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000228 for filepath, properties in files:
229 if 'l' not in properties:
230 continue
231 if sys.platform == 'win32':
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500232 # TODO(maruel): Create symlink via the win32 api.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000233 logging.warning('Ignoring symlink %s', filepath)
234 continue
235 outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500236 # os.symlink() doesn't exist on Windows.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000237 os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000238
239
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000240def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000241 """Determines if the given files appears valid.
242
243 Currently it just checks the file's size.
244 """
245 if size == UNKNOWN_FILE_SIZE:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000246 return os.path.isfile(filepath)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000247 actual_size = os.stat(filepath).st_size
248 if size != actual_size:
249 logging.warning(
250 'Found invalid item %s; %d != %d',
251 os.path.basename(filepath), actual_size, size)
252 return False
253 return True
254
255
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000256class WorkerPool(threading_utils.AutoRetryThreadPool):
257 """Thread pool that automatically retries on IOError and runs a preconfigured
258 function.
259 """
260 # Initial and maximum number of worker threads.
261 INITIAL_WORKERS = 2
262 MAX_WORKERS = 16
263 RETRIES = 5
264
265 def __init__(self):
266 super(WorkerPool, self).__init__(
267 [IOError],
268 self.RETRIES,
269 self.INITIAL_WORKERS,
270 self.MAX_WORKERS,
271 0,
272 'remote')
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000273
274
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000275class Item(object):
276 """An item to push to Storage.
277
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800278 Its digest and size may be provided in advance, if known. Otherwise they will
279 be derived from content(). If digest is provided, it MUST correspond to
280 hash algorithm used by Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000281
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800282 When used with Storage, Item starts its life in a main thread, travels
283 to 'contains' thread, then to 'push' thread and then finally back to
284 the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000285 """
286
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800287 def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000288 self.digest = digest
289 self.size = size
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800290 self.high_priority = high_priority
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000291 self.compression_level = 6
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000292
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800293 def content(self):
294 """Iterable with content of this item as byte string (str) chunks."""
295 raise NotImplementedError()
296
297 def prepare(self, hash_algo):
298 """Ensures self.digest and self.size are set.
299
300 Uses content() as a source of data to calculate them. Does nothing if digest
301 and size is already known.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000302
303 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800304 hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000305 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800306 if self.digest is None or self.size is None:
307 digest = hash_algo()
308 total = 0
309 for chunk in self.content():
310 digest.update(chunk)
311 total += len(chunk)
312 self.digest = digest.hexdigest()
313 self.size = total
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000314
315
316class FileItem(Item):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800317 """A file to push to Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000318
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800319 Its digest and size may be provided in advance, if known. Otherwise they will
320 be derived from the file content.
321 """
322
323 def __init__(self, path, digest=None, size=None, high_priority=False):
324 super(FileItem, self).__init__(
325 digest,
326 size if size is not None else os.stat(path).st_size,
327 high_priority)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000328 self.path = path
329 self.compression_level = get_zip_compression_level(path)
330
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800331 def content(self):
332 return file_read(self.path)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000333
334
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000335class BufferItem(Item):
336 """A byte buffer to push to Storage."""
337
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800338 def __init__(self, buf, high_priority=False):
339 super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000340 self.buffer = buf
341
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800342 def content(self):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000343 return [self.buffer]
344
345
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000346class Storage(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800347 """Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000348
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800349 Implements compression support, parallel 'contains' checks, parallel uploads
350 and more.
351
352 Works only within single namespace (and thus hashing algorithm and compression
353 scheme are fixed).
354
355 Spawns multiple internal threads. Thread safe, but not fork safe.
356 """
357
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700358 def __init__(self, storage_api):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000359 self._storage_api = storage_api
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700360 self._use_zip = is_namespace_with_compression(storage_api.namespace)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400361 self._hash_algo = isolated_format.get_hash_algo(storage_api.namespace)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000362 self._cpu_thread_pool = None
363 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000364
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000365 @property
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700366 def hash_algo(self):
367 """Hashing algorithm used to name files in storage based on their content.
368
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400369 Defined by |namespace|. See also isolated_format.get_hash_algo().
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700370 """
371 return self._hash_algo
372
373 @property
374 def location(self):
375 """Location of a backing store that this class is using.
376
377 Exact meaning depends on the storage_api type. For IsolateServer it is
378 an URL of isolate server, for FileSystem is it a path in file system.
379 """
380 return self._storage_api.location
381
382 @property
383 def namespace(self):
384 """Isolate namespace used by this storage.
385
386 Indirectly defines hashing scheme and compression method used.
387 """
388 return self._storage_api.namespace
389
390 @property
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000391 def cpu_thread_pool(self):
392 """ThreadPool for CPU-bound tasks like zipping."""
393 if self._cpu_thread_pool is None:
394 self._cpu_thread_pool = threading_utils.ThreadPool(
395 2, max(threading_utils.num_processors(), 2), 0, 'zip')
396 return self._cpu_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000397
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000398 @property
399 def net_thread_pool(self):
400 """AutoRetryThreadPool for IO-bound tasks, retries IOError."""
401 if self._net_thread_pool is None:
402 self._net_thread_pool = WorkerPool()
403 return self._net_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000404
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000405 def close(self):
406 """Waits for all pending tasks to finish."""
407 if self._cpu_thread_pool:
408 self._cpu_thread_pool.join()
409 self._cpu_thread_pool.close()
410 self._cpu_thread_pool = None
411 if self._net_thread_pool:
412 self._net_thread_pool.join()
413 self._net_thread_pool.close()
414 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000415
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000416 def __enter__(self):
417 """Context manager interface."""
418 return self
419
420 def __exit__(self, _exc_type, _exc_value, _traceback):
421 """Context manager interface."""
422 self.close()
423 return False
424
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000425 def upload_items(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800426 """Uploads a bunch of items to the isolate server.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000427
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800428 It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000429
430 Arguments:
431 items: list of Item instances that represents data to upload.
432
433 Returns:
434 List of items that were uploaded. All other items are already there.
435 """
436 # TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
437 # used by swarming.py. There's no need to spawn multiple threads and try to
438 # do stuff in parallel: there's nothing to parallelize. 'contains' check and
439 # 'push' should be performed sequentially in the context of current thread.
440
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800441 # Ensure all digests are calculated.
442 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700443 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800444
vadimsh@chromium.org672cd2b2013-10-08 17:49:33 +0000445 # For each digest keep only first Item that matches it. All other items
446 # are just indistinguishable copies from the point of view of isolate
447 # server (it doesn't care about paths at all, only content and digests).
448 seen = {}
449 duplicates = 0
450 for item in items:
451 if seen.setdefault(item.digest, item) is not item:
452 duplicates += 1
453 items = seen.values()
454 if duplicates:
455 logging.info('Skipped %d duplicated files', duplicates)
456
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000457 # Enqueue all upload tasks.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000458 missing = set()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000459 uploaded = []
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800460 channel = threading_utils.TaskChannel()
461 for missing_item, push_state in self.get_missing_items(items):
462 missing.add(missing_item)
463 self.async_push(channel, missing_item, push_state)
464
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000465 # No need to spawn deadlock detector thread if there's nothing to upload.
466 if missing:
467 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
468 # Wait for all started uploads to finish.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000469 while len(uploaded) != len(missing):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000470 detector.ping()
471 item = channel.pull()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000472 uploaded.append(item)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000473 logging.debug(
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000474 'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000475 logging.info('All files are uploaded')
476
477 # Print stats.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000478 total = len(items)
479 total_size = sum(f.size for f in items)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000480 logging.info(
481 'Total: %6d, %9.1fkb',
482 total,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000483 total_size / 1024.)
484 cache_hit = set(items) - missing
485 cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000486 logging.info(
487 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
488 len(cache_hit),
489 cache_hit_size / 1024.,
490 len(cache_hit) * 100. / total,
491 cache_hit_size * 100. / total_size if total_size else 0)
492 cache_miss = missing
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000493 cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000494 logging.info(
495 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
496 len(cache_miss),
497 cache_miss_size / 1024.,
498 len(cache_miss) * 100. / total,
499 cache_miss_size * 100. / total_size if total_size else 0)
500
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000501 return uploaded
502
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800503 def get_fetch_url(self, item):
504 """Returns an URL that can be used to fetch given item once it's uploaded.
505
506 Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000507
508 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800509 item: Item to get fetch URL for.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000510
511 Returns:
512 An URL or None if underlying protocol doesn't support this.
513 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700514 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800515 return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000516
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800517 def async_push(self, channel, item, push_state):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000518 """Starts asynchronous push to the server in a parallel thread.
519
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800520 Can be used only after |item| was checked for presence on a server with
521 'get_missing_items' call. 'get_missing_items' returns |push_state| object
522 that contains storage specific information describing how to upload
523 the item (for example in case of cloud storage, it is signed upload URLs).
524
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000525 Arguments:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000526 channel: TaskChannel that receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000527 item: item to upload as instance of Item class.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800528 push_state: push state returned by 'get_missing_items' call for |item|.
529
530 Returns:
531 None, but |channel| later receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000532 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800533 # Thread pool task priority.
534 priority = WorkerPool.HIGH if item.high_priority else WorkerPool.MED
535
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000536 def push(content):
Marc-Antoine Ruel095a8be2014-03-21 14:58:19 -0400537 """Pushes an Item and returns it to |channel|."""
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700538 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800539 self._storage_api.push(item, push_state, content)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000540 return item
541
542 # If zipping is not required, just start a push task.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700543 if not self._use_zip:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800544 self.net_thread_pool.add_task_with_channel(
545 channel, priority, push, item.content())
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000546 return
547
548 # If zipping is enabled, zip in a separate thread.
549 def zip_and_push():
550 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
551 # content right here. It will block until all file is zipped.
552 try:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800553 stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000554 data = ''.join(stream)
555 except Exception as exc:
556 logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800557 channel.send_exception()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000558 return
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000559 self.net_thread_pool.add_task_with_channel(
560 channel, priority, push, [data])
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000561 self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000562
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800563 def push(self, item, push_state):
564 """Synchronously pushes a single item to the server.
565
566 If you need to push many items at once, consider using 'upload_items' or
567 'async_push' with instance of TaskChannel.
568
569 Arguments:
570 item: item to upload as instance of Item class.
571 push_state: push state returned by 'get_missing_items' call for |item|.
572
573 Returns:
574 Pushed item (same object as |item|).
575 """
576 channel = threading_utils.TaskChannel()
577 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
578 self.async_push(channel, item, push_state)
579 pushed = channel.pull()
580 assert pushed is item
581 return item
582
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000583 def async_fetch(self, channel, priority, digest, size, sink):
584 """Starts asynchronous fetch from the server in a parallel thread.
585
586 Arguments:
587 channel: TaskChannel that receives back |digest| when download ends.
588 priority: thread pool task priority for the fetch.
589 digest: hex digest of an item to download.
590 size: expected size of the item (after decompression).
591 sink: function that will be called as sink(generator).
592 """
593 def fetch():
594 try:
595 # Prepare reading pipeline.
596 stream = self._storage_api.fetch(digest)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700597 if self._use_zip:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400598 stream = zip_decompress(stream, isolated_format.DISK_FILE_CHUNK)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000599 # Run |stream| through verifier that will assert its size.
600 verifier = FetchStreamVerifier(stream, size)
601 # Verified stream goes to |sink|.
602 sink(verifier.run())
603 except Exception as err:
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800604 logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000605 raise
606 return digest
607
608 # Don't bother with zip_thread_pool for decompression. Decompression is
609 # really fast and most probably IO bound anyway.
610 self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
611
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000612 def get_missing_items(self, items):
613 """Yields items that are missing from the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000614
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000615 Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000616
617 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000618 items: a list of Item objects to check.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000619
620 Yields:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800621 For each missing item it yields a pair (item, push_state), where:
622 * item - Item object that is missing (one of |items|).
623 * push_state - opaque object that contains storage specific information
624 describing how to upload the item (for example in case of cloud
625 storage, it is signed upload URLs). It can later be passed to
626 'async_push'.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000627 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000628 channel = threading_utils.TaskChannel()
629 pending = 0
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800630
631 # Ensure all digests are calculated.
632 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700633 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800634
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000635 # Enqueue all requests.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800636 for batch in batch_items_for_check(items):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000637 self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
638 self._storage_api.contains, batch)
639 pending += 1
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800640
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000641 # Yield results as they come in.
642 for _ in xrange(pending):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800643 for missing_item, push_state in channel.pull().iteritems():
644 yield missing_item, push_state
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000645
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000646
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800647def batch_items_for_check(items):
648 """Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000649
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800650 Each batch corresponds to a single 'exists?' query to the server via a call
651 to StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000652
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800653 Arguments:
654 items: a list of Item objects.
655
656 Yields:
657 Batches of items to query for existence in a single operation,
658 each batch is a list of Item objects.
659 """
660 batch_count = 0
661 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
662 next_queries = []
663 for item in sorted(items, key=lambda x: x.size, reverse=True):
664 next_queries.append(item)
665 if len(next_queries) == batch_size_limit:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000666 yield next_queries
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800667 next_queries = []
668 batch_count += 1
669 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
670 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
671 if next_queries:
672 yield next_queries
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000673
674
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000675class FetchQueue(object):
676 """Fetches items from Storage and places them into LocalCache.
677
678 It manages multiple concurrent fetch operations. Acts as a bridge between
679 Storage and LocalCache so that Storage and LocalCache don't depend on each
680 other at all.
681 """
682
683 def __init__(self, storage, cache):
684 self.storage = storage
685 self.cache = cache
686 self._channel = threading_utils.TaskChannel()
687 self._pending = set()
688 self._accessed = set()
689 self._fetched = cache.cached_set()
690
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800691 def add(self, digest, size=UNKNOWN_FILE_SIZE, priority=WorkerPool.MED):
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000692 """Starts asynchronous fetch of item |digest|."""
693 # Fetching it now?
694 if digest in self._pending:
695 return
696
697 # Mark this file as in use, verify_all_cached will later ensure it is still
698 # in cache.
699 self._accessed.add(digest)
700
701 # Already fetched? Notify cache to update item's LRU position.
702 if digest in self._fetched:
703 # 'touch' returns True if item is in cache and not corrupted.
704 if self.cache.touch(digest, size):
705 return
706 # Item is corrupted, remove it from cache and fetch it again.
707 self._fetched.remove(digest)
708 self.cache.evict(digest)
709
710 # TODO(maruel): It should look at the free disk space, the current cache
711 # size and the size of the new item on every new item:
712 # - Trim the cache as more entries are listed when free disk space is low,
713 # otherwise if the amount of data downloaded during the run > free disk
714 # space, it'll crash.
715 # - Make sure there's enough free disk space to fit all dependencies of
716 # this run! If not, abort early.
717
718 # Start fetching.
719 self._pending.add(digest)
720 self.storage.async_fetch(
721 self._channel, priority, digest, size,
722 functools.partial(self.cache.write, digest))
723
724 def wait(self, digests):
725 """Starts a loop that waits for at least one of |digests| to be retrieved.
726
727 Returns the first digest retrieved.
728 """
729 # Flush any already fetched items.
730 for digest in digests:
731 if digest in self._fetched:
732 return digest
733
734 # Ensure all requested items are being fetched now.
735 assert all(digest in self._pending for digest in digests), (
736 digests, self._pending)
737
738 # Wait for some requested item to finish fetching.
739 while self._pending:
740 digest = self._channel.pull()
741 self._pending.remove(digest)
742 self._fetched.add(digest)
743 if digest in digests:
744 return digest
745
746 # Should never reach this point due to assert above.
747 raise RuntimeError('Impossible state')
748
749 def inject_local_file(self, path, algo):
750 """Adds local file to the cache as if it was fetched from storage."""
751 with open(path, 'rb') as f:
752 data = f.read()
753 digest = algo(data).hexdigest()
754 self.cache.write(digest, [data])
755 self._fetched.add(digest)
756 return digest
757
758 @property
759 def pending_count(self):
760 """Returns number of items to be fetched."""
761 return len(self._pending)
762
763 def verify_all_cached(self):
764 """True if all accessed items are in cache."""
765 return self._accessed.issubset(self.cache.cached_set())
766
767
768class FetchStreamVerifier(object):
769 """Verifies that fetched file is valid before passing it to the LocalCache."""
770
771 def __init__(self, stream, expected_size):
772 self.stream = stream
773 self.expected_size = expected_size
774 self.current_size = 0
775
776 def run(self):
777 """Generator that yields same items as |stream|.
778
779 Verifies |stream| is complete before yielding a last chunk to consumer.
780
781 Also wraps IOError produced by consumer into MappingError exceptions since
782 otherwise Storage will retry fetch on unrelated local cache errors.
783 """
784 # Read one chunk ahead, keep it in |stored|.
785 # That way a complete stream can be verified before pushing last chunk
786 # to consumer.
787 stored = None
788 for chunk in self.stream:
789 assert chunk is not None
790 if stored is not None:
791 self._inspect_chunk(stored, is_last=False)
792 try:
793 yield stored
794 except IOError as exc:
795 raise MappingError('Failed to store an item in cache: %s' % exc)
796 stored = chunk
797 if stored is not None:
798 self._inspect_chunk(stored, is_last=True)
799 try:
800 yield stored
801 except IOError as exc:
802 raise MappingError('Failed to store an item in cache: %s' % exc)
803
804 def _inspect_chunk(self, chunk, is_last):
805 """Called for each fetched chunk before passing it to consumer."""
806 self.current_size += len(chunk)
807 if (is_last and (self.expected_size != UNKNOWN_FILE_SIZE) and
808 (self.expected_size != self.current_size)):
809 raise IOError('Incorrect file size: expected %d, got %d' % (
810 self.expected_size, self.current_size))
811
812
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000813class StorageApi(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800814 """Interface for classes that implement low-level storage operations.
815
816 StorageApi is oblivious of compression and hashing scheme used. This details
817 are handled in higher level Storage class.
818
819 Clients should generally not use StorageApi directly. Storage class is
820 preferred since it implements compression and upload optimizations.
821 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000822
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700823 @property
824 def location(self):
825 """Location of a backing store that this class is using.
826
827 Exact meaning depends on the type. For IsolateServer it is an URL of isolate
828 server, for FileSystem is it a path in file system.
829 """
830 raise NotImplementedError()
831
832 @property
833 def namespace(self):
834 """Isolate namespace used by this storage.
835
836 Indirectly defines hashing scheme and compression method used.
837 """
838 raise NotImplementedError()
839
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000840 def get_fetch_url(self, digest):
841 """Returns an URL that can be used to fetch an item with given digest.
842
843 Arguments:
844 digest: hex digest of item to fetch.
845
846 Returns:
847 An URL or None if the protocol doesn't support this.
848 """
849 raise NotImplementedError()
850
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800851 def fetch(self, digest, offset=0):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000852 """Fetches an object and yields its content.
853
854 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000855 digest: hash digest of item to download.
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800856 offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000857
858 Yields:
859 Chunks of downloaded item (as str objects).
860 """
861 raise NotImplementedError()
862
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800863 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000864 """Uploads an |item| with content generated by |content| generator.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000865
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800866 |item| MUST go through 'contains' call to get |push_state| before it can
867 be pushed to the storage.
868
869 To be clear, here is one possible usage:
870 all_items = [... all items to push as Item subclasses ...]
871 for missing_item, push_state in storage_api.contains(all_items).items():
872 storage_api.push(missing_item, push_state)
873
874 When pushing to a namespace with compression, data that should be pushed
875 and data provided by the item is not the same. In that case |content| is
876 not None and it yields chunks of compressed data (using item.content() as
877 a source of original uncompressed data). This is implemented by Storage
878 class.
879
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000880 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000881 item: Item object that holds information about an item being pushed.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800882 push_state: push state object as returned by 'contains' call.
883 content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000884
885 Returns:
886 None.
887 """
888 raise NotImplementedError()
889
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000890 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800891 """Checks for |items| on the server, prepares missing ones for upload.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000892
893 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800894 items: list of Item objects to check for presence.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000895
896 Returns:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800897 A dict missing Item -> opaque push state object to be passed to 'push'.
898 See doc string for 'push'.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000899 """
900 raise NotImplementedError()
901
902
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800903class _IsolateServerPushState(object):
904 """Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger27f03da2014-02-12 16:47:01 -0500905
906 Note this needs to be a global class to support pickling.
907 """
908
909 def __init__(self, upload_url, finalize_url):
910 self.upload_url = upload_url
911 self.finalize_url = finalize_url
912 self.uploaded = False
913 self.finalized = False
914
915
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000916class IsolateServer(StorageApi):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000917 """StorageApi implementation that downloads and uploads to Isolate Server.
918
919 It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800920 Works only within single namespace.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000921 """
922
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000923 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000924 super(IsolateServer, self).__init__()
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000925 assert base_url.startswith('http'), base_url
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700926 self._base_url = base_url.rstrip('/')
927 self._namespace = namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000928 self._lock = threading.Lock()
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000929 self._server_caps = None
930
931 @staticmethod
932 def _generate_handshake_request():
933 """Returns a dict to be sent as handshake request body."""
934 # TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
935 return {
936 'client_app_version': __version__,
937 'fetcher': True,
938 'protocol_version': ISOLATE_PROTOCOL_VERSION,
939 'pusher': True,
940 }
941
942 @staticmethod
943 def _validate_handshake_response(caps):
944 """Validates and normalizes handshake response."""
945 logging.info('Protocol version: %s', caps['protocol_version'])
946 logging.info('Server version: %s', caps['server_app_version'])
947 if caps.get('error'):
948 raise MappingError(caps['error'])
949 if not caps['access_token']:
950 raise ValueError('access_token is missing')
951 return caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000952
953 @property
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000954 def _server_capabilities(self):
955 """Performs handshake with the server if not yet done.
956
957 Returns:
958 Server capabilities dictionary as returned by /handshake endpoint.
959
960 Raises:
961 MappingError if server rejects the handshake.
962 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000963 # TODO(maruel): Make this request much earlier asynchronously while the
964 # files are being enumerated.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800965
966 # TODO(vadimsh): Put |namespace| in the URL so that server can apply
967 # namespace-level ACLs to this call.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000968 with self._lock:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000969 if self._server_caps is None:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000970 try:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -0400971 caps = net.url_read_json(
972 url=self._base_url + '/content-gs/handshake',
973 data=self._generate_handshake_request())
974 if caps is None:
975 raise MappingError('Failed to perform handshake.')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000976 if not isinstance(caps, dict):
977 raise ValueError('Expecting JSON dict')
978 self._server_caps = self._validate_handshake_response(caps)
979 except (ValueError, KeyError, TypeError) as exc:
980 # KeyError exception has very confusing str conversion: it's just a
981 # missing key value and nothing else. So print exception class name
982 # as well.
983 raise MappingError('Invalid handshake response (%s): %s' % (
984 exc.__class__.__name__, exc))
985 return self._server_caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000986
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700987 @property
988 def location(self):
989 return self._base_url
990
991 @property
992 def namespace(self):
993 return self._namespace
994
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000995 def get_fetch_url(self, digest):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000996 assert isinstance(digest, basestring)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000997 return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700998 self._base_url, self._namespace, digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000999
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001000 def fetch(self, digest, offset=0):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001001 source_url = self.get_fetch_url(digest)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001002 logging.debug('download_file(%s, %d)', source_url, offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001003
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001004 connection = net.url_open(
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001005 source_url,
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001006 read_timeout=DOWNLOAD_READ_TIMEOUT,
1007 headers={'Range': 'bytes=%d-' % offset} if offset else None)
1008
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001009 if not connection:
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001010 raise IOError('Request failed - %s' % source_url)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001011
1012 # If |offset| is used, verify server respects it by checking Content-Range.
1013 if offset:
1014 content_range = connection.get_header('Content-Range')
1015 if not content_range:
1016 raise IOError('Missing Content-Range header')
1017
1018 # 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
1019 # According to a spec, <size> can be '*' meaning "Total size of the file
1020 # is not known in advance".
1021 try:
1022 match = re.match(r'bytes (\d+)-(\d+)/(\d+|\*)', content_range)
1023 if not match:
1024 raise ValueError()
1025 content_offset = int(match.group(1))
1026 last_byte_index = int(match.group(2))
1027 size = None if match.group(3) == '*' else int(match.group(3))
1028 except ValueError:
1029 raise IOError('Invalid Content-Range header: %s' % content_range)
1030
1031 # Ensure returned offset equals requested one.
1032 if offset != content_offset:
1033 raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
1034 offset, content_offset, content_range))
1035
1036 # Ensure entire tail of the file is returned.
1037 if size is not None and last_byte_index + 1 != size:
1038 raise IOError('Incomplete response. Content-Range: %s' % content_range)
1039
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001040 return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001041
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001042 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001043 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001044 assert item.digest is not None
1045 assert item.size is not None
1046 assert isinstance(push_state, _IsolateServerPushState)
1047 assert not push_state.finalized
1048
1049 # Default to item.content().
1050 content = item.content() if content is None else content
1051
1052 # Do not iterate byte by byte over 'str'. Push it all as a single chunk.
1053 if isinstance(content, basestring):
1054 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1055 content = [content]
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001056
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001057 # TODO(vadimsh): Do not read from |content| generator when retrying push.
1058 # If |content| is indeed a generator, it can not be re-winded back
1059 # to the beginning of the stream. A retry will find it exhausted. A possible
1060 # solution is to wrap |content| generator with some sort of caching
1061 # restartable generator. It should be done alongside streaming support
1062 # implementation.
1063
1064 # This push operation may be a retry after failed finalization call below,
1065 # no need to reupload contents in that case.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001066 if not push_state.uploaded:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001067 # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
1068 # upload support is implemented.
1069 if isinstance(content, list) and len(content) == 1:
1070 content = content[0]
1071 else:
1072 content = ''.join(content)
1073 # PUT file to |upload_url|.
1074 response = net.url_read(
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001075 url=push_state.upload_url,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001076 data=content,
1077 content_type='application/octet-stream',
1078 method='PUT')
1079 if response is None:
1080 raise IOError('Failed to upload a file %s to %s' % (
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001081 item.digest, push_state.upload_url))
1082 push_state.uploaded = True
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001083 else:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001084 logging.info(
1085 'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001086
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001087 # Optionally notify the server that it's done.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001088 if push_state.finalize_url:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001089 # TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
1090 # send it to isolated server. That way isolate server can verify that
1091 # the data safely reached Google Storage (GS provides MD5 and CRC32C of
1092 # stored files).
Marc-Antoine Ruelc1c2ccc2014-08-13 19:18:49 -04001093 # TODO(maruel): Fix the server to accept propery data={} so
1094 # url_read_json() can be used.
1095 response = net.url_read(
1096 url=push_state.finalize_url,
1097 data='',
1098 content_type='application/json',
1099 method='POST')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001100 if response is None:
1101 raise IOError('Failed to finalize an upload of %s' % item.digest)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001102 push_state.finalized = True
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001103
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001104 def contains(self, items):
1105 logging.info('Checking existence of %d files...', len(items))
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001106
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001107 # Ensure all items were initialized with 'prepare' call. Storage does that.
1108 assert all(i.digest is not None and i.size is not None for i in items)
1109
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001110 # Request body is a json encoded list of dicts.
1111 body = [
1112 {
1113 'h': item.digest,
1114 's': item.size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001115 'i': int(item.high_priority),
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001116 } for item in items
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001117 ]
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001118
1119 query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001120 self._base_url,
1121 self._namespace,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001122 urllib.quote(self._server_capabilities['access_token']))
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001123
1124 # Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001125 response = None
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001126 try:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001127 response = net.url_read_json(url=query_url, data=body)
1128 if response is None:
1129 raise MappingError('Failed to execute /pre-upload query')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001130 if not isinstance(response, list):
1131 raise ValueError('Expecting response with json-encoded list')
1132 if len(response) != len(items):
1133 raise ValueError(
1134 'Incorrect number of items in the list, expected %d, '
1135 'but got %d' % (len(items), len(response)))
1136 except ValueError as err:
1137 raise MappingError(
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001138 'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001139
1140 # Pick Items that are missing, attach _PushState to them.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001141 missing_items = {}
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001142 for i, push_urls in enumerate(response):
1143 if push_urls:
1144 assert len(push_urls) == 2, str(push_urls)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001145 missing_items[items[i]] = _IsolateServerPushState(
1146 push_urls[0], push_urls[1])
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001147 logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001148 len(items), len(items) - len(missing_items))
1149 return missing_items
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001150
1151
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001152class FileSystem(StorageApi):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001153 """StorageApi implementation that fetches data from the file system.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001154
1155 The common use case is a NFS/CIFS file server that is mounted locally that is
1156 used to fetch the file on a local partition.
1157 """
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001158
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001159 # Used for push_state instead of None. That way caller is forced to
1160 # call 'contains' before 'push'. Naively passing None in 'push' will not work.
1161 _DUMMY_PUSH_STATE = object()
1162
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001163 def __init__(self, base_path, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001164 super(FileSystem, self).__init__()
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001165 self._base_path = base_path
1166 self._namespace = namespace
1167
1168 @property
1169 def location(self):
1170 return self._base_path
1171
1172 @property
1173 def namespace(self):
1174 return self._namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001175
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001176 def get_fetch_url(self, digest):
1177 return None
1178
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001179 def fetch(self, digest, offset=0):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001180 assert isinstance(digest, basestring)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001181 return file_read(os.path.join(self._base_path, digest), offset=offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001182
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001183 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001184 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001185 assert item.digest is not None
1186 assert item.size is not None
1187 assert push_state is self._DUMMY_PUSH_STATE
1188 content = item.content() if content is None else content
1189 if isinstance(content, basestring):
1190 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1191 content = [content]
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001192 file_write(os.path.join(self._base_path, item.digest), content)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001193
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001194 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001195 assert all(i.digest is not None and i.size is not None for i in items)
1196 return dict(
1197 (item, self._DUMMY_PUSH_STATE) for item in items
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001198 if not os.path.exists(os.path.join(self._base_path, item.digest))
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001199 )
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001200
1201
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001202class LocalCache(object):
1203 """Local cache that stores objects fetched via Storage.
1204
1205 It can be accessed concurrently from multiple threads, so it should protect
1206 its internal state with some lock.
1207 """
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001208 cache_dir = None
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001209
1210 def __enter__(self):
1211 """Context manager interface."""
1212 return self
1213
1214 def __exit__(self, _exc_type, _exec_value, _traceback):
1215 """Context manager interface."""
1216 return False
1217
1218 def cached_set(self):
1219 """Returns a set of all cached digests (always a new object)."""
1220 raise NotImplementedError()
1221
1222 def touch(self, digest, size):
1223 """Ensures item is not corrupted and updates its LRU position.
1224
1225 Arguments:
1226 digest: hash digest of item to check.
1227 size: expected size of this item.
1228
1229 Returns:
1230 True if item is in cache and not corrupted.
1231 """
1232 raise NotImplementedError()
1233
1234 def evict(self, digest):
1235 """Removes item from cache if it's there."""
1236 raise NotImplementedError()
1237
1238 def read(self, digest):
1239 """Returns contents of the cached item as a single str."""
1240 raise NotImplementedError()
1241
1242 def write(self, digest, content):
1243 """Reads data from |content| generator and stores it in cache."""
1244 raise NotImplementedError()
1245
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001246 def hardlink(self, digest, dest, file_mode):
1247 """Ensures file at |dest| has same content as cached |digest|.
1248
1249 If file_mode is provided, it is used to set the executable bit if
1250 applicable.
1251 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001252 raise NotImplementedError()
1253
1254
1255class MemoryCache(LocalCache):
1256 """LocalCache implementation that stores everything in memory."""
1257
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001258 def __init__(self, file_mode_mask=0500):
1259 """Args:
1260 file_mode_mask: bit mask to AND file mode with. Default value will make
1261 all mapped files to be read only.
1262 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001263 super(MemoryCache, self).__init__()
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001264 self._file_mode_mask = file_mode_mask
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001265 # Let's not assume dict is thread safe.
1266 self._lock = threading.Lock()
1267 self._contents = {}
1268
1269 def cached_set(self):
1270 with self._lock:
1271 return set(self._contents)
1272
1273 def touch(self, digest, size):
1274 with self._lock:
1275 return digest in self._contents
1276
1277 def evict(self, digest):
1278 with self._lock:
1279 self._contents.pop(digest, None)
1280
1281 def read(self, digest):
1282 with self._lock:
1283 return self._contents[digest]
1284
1285 def write(self, digest, content):
1286 # Assemble whole stream before taking the lock.
1287 data = ''.join(content)
1288 with self._lock:
1289 self._contents[digest] = data
1290
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001291 def hardlink(self, digest, dest, file_mode):
1292 """Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001293 file_write(dest, [self.read(digest)])
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001294 if file_mode is not None:
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001295 os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001296
1297
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001298def is_namespace_with_compression(namespace):
1299 """Returns True if given |namespace| stores compressed objects."""
1300 return namespace.endswith(('-gzip', '-deflate'))
1301
1302
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001303def get_storage_api(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001304 """Returns an object that implements low-level StorageApi interface.
1305
1306 It is used by Storage to work with single isolate |namespace|. It should
1307 rarely be used directly by clients, see 'get_storage' for
1308 a better alternative.
1309
1310 Arguments:
1311 file_or_url: a file path to use file system based storage, or URL of isolate
1312 service to use shared cloud based storage.
1313 namespace: isolate namespace to operate in, also defines hashing and
1314 compression scheme used, i.e. namespace names that end with '-gzip'
1315 store compressed data.
1316
1317 Returns:
1318 Instance of StorageApi subclass.
1319 """
Marc-Antoine Ruel37989932013-11-19 16:28:08 -05001320 if file_path.is_url(file_or_url):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001321 return IsolateServer(file_or_url, namespace)
1322 else:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001323 return FileSystem(file_or_url, namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001324
1325
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001326def get_storage(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001327 """Returns Storage class that can upload and download from |namespace|.
1328
1329 Arguments:
1330 file_or_url: a file path to use file system based storage, or URL of isolate
1331 service to use shared cloud based storage.
1332 namespace: isolate namespace to operate in, also defines hashing and
1333 compression scheme used, i.e. namespace names that end with '-gzip'
1334 store compressed data.
1335
1336 Returns:
1337 Instance of Storage.
1338 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001339 return Storage(get_storage_api(file_or_url, namespace))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001340
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001341
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001342def expand_symlinks(indir, relfile):
1343 """Follows symlinks in |relfile|, but treating symlinks that point outside the
1344 build tree as if they were ordinary directories/files. Returns the final
1345 symlink-free target and a list of paths to symlinks encountered in the
1346 process.
1347
1348 The rule about symlinks outside the build tree is for the benefit of the
1349 Chromium OS ebuild, which symlinks the output directory to an unrelated path
1350 in the chroot.
1351
1352 Fails when a directory loop is detected, although in theory we could support
1353 that case.
1354 """
1355 is_directory = relfile.endswith(os.path.sep)
1356 done = indir
1357 todo = relfile.strip(os.path.sep)
1358 symlinks = []
1359
1360 while todo:
1361 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
1362 done, todo)
1363 if not symlink:
1364 todo = file_path.fix_native_path_case(done, todo)
1365 done = os.path.join(done, todo)
1366 break
1367 symlink_path = os.path.join(done, pre_symlink, symlink)
1368 post_symlink = post_symlink.lstrip(os.path.sep)
1369 # readlink doesn't exist on Windows.
1370 # pylint: disable=E1101
1371 target = os.path.normpath(os.path.join(done, pre_symlink))
1372 symlink_target = os.readlink(symlink_path)
1373 if os.path.isabs(symlink_target):
1374 # Absolute path are considered a normal directories. The use case is
1375 # generally someone who puts the output directory on a separate drive.
1376 target = symlink_target
1377 else:
1378 # The symlink itself could be using the wrong path case.
1379 target = file_path.fix_native_path_case(target, symlink_target)
1380
1381 if not os.path.exists(target):
1382 raise MappingError(
1383 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
1384 target = file_path.get_native_path_case(target)
1385 if not file_path.path_starts_with(indir, target):
1386 done = symlink_path
1387 todo = post_symlink
1388 continue
1389 if file_path.path_starts_with(target, symlink_path):
1390 raise MappingError(
1391 'Can\'t map recursive symlink reference %s -> %s' %
1392 (symlink_path, target))
1393 logging.info('Found symlink: %s -> %s', symlink_path, target)
1394 symlinks.append(os.path.relpath(symlink_path, indir))
1395 # Treat the common prefix of the old and new paths as done, and start
1396 # scanning again.
1397 target = target.split(os.path.sep)
1398 symlink_path = symlink_path.split(os.path.sep)
1399 prefix_length = 0
1400 for target_piece, symlink_path_piece in zip(target, symlink_path):
1401 if target_piece == symlink_path_piece:
1402 prefix_length += 1
1403 else:
1404 break
1405 done = os.path.sep.join(target[:prefix_length])
1406 todo = os.path.join(
1407 os.path.sep.join(target[prefix_length:]), post_symlink)
1408
1409 relfile = os.path.relpath(done, indir)
1410 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
1411 return relfile, symlinks
1412
1413
1414def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
1415 """Expands a single input. It can result in multiple outputs.
1416
1417 This function is recursive when relfile is a directory.
1418
1419 Note: this code doesn't properly handle recursive symlink like one created
1420 with:
1421 ln -s .. foo
1422 """
1423 if os.path.isabs(relfile):
1424 raise MappingError('Can\'t map absolute path %s' % relfile)
1425
1426 infile = file_path.normpath(os.path.join(indir, relfile))
1427 if not infile.startswith(indir):
1428 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
1429
1430 filepath = os.path.join(indir, relfile)
1431 native_filepath = file_path.get_native_path_case(filepath)
1432 if filepath != native_filepath:
1433 # Special case './'.
1434 if filepath != native_filepath + '.' + os.path.sep:
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001435 # While it'd be nice to enforce path casing on Windows, it's impractical.
1436 # Also give up enforcing strict path case on OSX. Really, it's that sad.
1437 # The case where it happens is very specific and hard to reproduce:
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001438 # get_native_path_case(
1439 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
1440 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
1441 #
1442 # Note that this is really something deep in OSX because running
1443 # ls Foo.framework/Versions/A
1444 # will print out 'Resources', while file_path.get_native_path_case()
1445 # returns a lower case 'r'.
1446 #
1447 # So *something* is happening under the hood resulting in the command 'ls'
1448 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
1449 # have no idea why.
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001450 if sys.platform not in ('darwin', 'win32'):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001451 raise MappingError(
1452 'File path doesn\'t equal native file path\n%s != %s' %
1453 (filepath, native_filepath))
1454
1455 symlinks = []
1456 if follow_symlinks:
1457 relfile, symlinks = expand_symlinks(indir, relfile)
1458
1459 if relfile.endswith(os.path.sep):
1460 if not os.path.isdir(infile):
1461 raise MappingError(
1462 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
1463
1464 # Special case './'.
1465 if relfile.startswith('.' + os.path.sep):
1466 relfile = relfile[2:]
1467 outfiles = symlinks
1468 try:
1469 for filename in os.listdir(infile):
1470 inner_relfile = os.path.join(relfile, filename)
1471 if blacklist and blacklist(inner_relfile):
1472 continue
1473 if os.path.isdir(os.path.join(indir, inner_relfile)):
1474 inner_relfile += os.path.sep
1475 outfiles.extend(
1476 expand_directory_and_symlink(indir, inner_relfile, blacklist,
1477 follow_symlinks))
1478 return outfiles
1479 except OSError as e:
1480 raise MappingError(
1481 'Unable to iterate over directory %s.\n%s' % (infile, e))
1482 else:
1483 # Always add individual files even if they were blacklisted.
1484 if os.path.isdir(infile):
1485 raise MappingError(
1486 'Input directory %s must have a trailing slash' % infile)
1487
1488 if not os.path.isfile(infile):
1489 raise MappingError('Input file %s doesn\'t exist' % infile)
1490
1491 return symlinks + [relfile]
1492
1493
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001494def process_input(filepath, prevdict, read_only, algo):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001495 """Processes an input file, a dependency, and return meta data about it.
1496
1497 Behaviors:
1498 - Retrieves the file mode, file size, file timestamp, file link
1499 destination if it is a file link and calcultate the SHA-1 of the file's
1500 content if the path points to a file and not a symlink.
1501
1502 Arguments:
1503 filepath: File to act on.
1504 prevdict: the previous dictionary. It is used to retrieve the cached sha-1
1505 to skip recalculating the hash. Optional.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001506 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001507 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
1508 windows, mode is not set since all files are 'executable' by
1509 default.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001510 algo: Hashing algorithm used.
1511
1512 Returns:
1513 The necessary data to create a entry in the 'files' section of an .isolated
1514 file.
1515 """
1516 out = {}
1517 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
1518 # if prevdict.get('T') == True:
1519 # # The file's content is ignored. Skip the time and hard code mode.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001520 # out['s'] = 0
1521 # out['h'] = algo().hexdigest()
1522 # out['T'] = True
1523 # return out
1524
1525 # Always check the file stat and check if it is a link. The timestamp is used
1526 # to know if the file's content/symlink destination should be looked into.
1527 # E.g. only reuse from prevdict if the timestamp hasn't changed.
1528 # There is the risk of the file's timestamp being reset to its last value
1529 # manually while its content changed. We don't protect against that use case.
1530 try:
1531 filestats = os.lstat(filepath)
1532 except OSError:
1533 # The file is not present.
1534 raise MappingError('%s is missing' % filepath)
1535 is_link = stat.S_ISLNK(filestats.st_mode)
1536
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001537 if sys.platform != 'win32':
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001538 # Ignore file mode on Windows since it's not really useful there.
1539 filemode = stat.S_IMODE(filestats.st_mode)
1540 # Remove write access for group and all access to 'others'.
1541 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
1542 if read_only:
1543 filemode &= ~stat.S_IWUSR
1544 if filemode & stat.S_IXUSR:
1545 filemode |= stat.S_IXGRP
1546 else:
1547 filemode &= ~stat.S_IXGRP
1548 if not is_link:
1549 out['m'] = filemode
1550
1551 # Used to skip recalculating the hash or link destination. Use the most recent
1552 # update time.
1553 # TODO(maruel): Save it in the .state file instead of .isolated so the
1554 # .isolated file is deterministic.
1555 out['t'] = int(round(filestats.st_mtime))
1556
1557 if not is_link:
1558 out['s'] = filestats.st_size
1559 # If the timestamp wasn't updated and the file size is still the same, carry
1560 # on the sha-1.
1561 if (prevdict.get('t') == out['t'] and
1562 prevdict.get('s') == out['s']):
1563 # Reuse the previous hash if available.
1564 out['h'] = prevdict.get('h')
1565 if not out.get('h'):
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001566 out['h'] = isolated_format.hash_file(filepath, algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001567 else:
1568 # If the timestamp wasn't updated, carry on the link destination.
1569 if prevdict.get('t') == out['t']:
1570 # Reuse the previous link destination if available.
1571 out['l'] = prevdict.get('l')
1572 if out.get('l') is None:
1573 # The link could be in an incorrect path case. In practice, this only
1574 # happen on OSX on case insensitive HFS.
1575 # TODO(maruel): It'd be better if it was only done once, in
1576 # expand_directory_and_symlink(), so it would not be necessary to do again
1577 # here.
1578 symlink_value = os.readlink(filepath) # pylint: disable=E1101
1579 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
1580 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
1581 out['l'] = os.path.relpath(native_dest, filedir)
1582 return out
1583
1584
1585def save_isolated(isolated, data):
1586 """Writes one or multiple .isolated files.
1587
1588 Note: this reference implementation does not create child .isolated file so it
1589 always returns an empty list.
1590
1591 Returns the list of child isolated files that are included by |isolated|.
1592 """
1593 # Make sure the data is valid .isolated data by 'reloading' it.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001594 algo = isolated_format.SUPPORTED_ALGOS[data['algo']]
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001595 load_isolated(json.dumps(data), algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001596 tools.write_json(isolated, data, True)
1597 return []
1598
1599
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001600def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001601 """Uploads the given tree to the given url.
1602
1603 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001604 base_url: The base url, it is assume that |base_url|/has/ can be used to
1605 query if an element was already uploaded, and |base_url|/store/
1606 can be used to upload a new element.
1607 indir: Root directory the infiles are based in.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001608 infiles: dict of files to upload from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001609 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001610 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001611 logging.info('upload_tree(indir=%s, files=%d)', indir, len(infiles))
1612
1613 # Convert |indir| + |infiles| into a list of FileItem objects.
1614 # Filter out symlinks, since they are not represented by items on isolate
1615 # server side.
1616 items = [
1617 FileItem(
1618 path=os.path.join(indir, filepath),
1619 digest=metadata['h'],
1620 size=metadata['s'],
1621 high_priority=metadata.get('priority') == '0')
1622 for filepath, metadata in infiles.iteritems()
1623 if 'l' not in metadata
1624 ]
1625
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001626 with get_storage(base_url, namespace) as storage:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001627 storage.upload_items(items)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001628 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001629
1630
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001631def load_isolated(content, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001632 """Verifies the .isolated file is valid and loads this object with the json
1633 data.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001634
1635 Arguments:
1636 - content: raw serialized content to load.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001637 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
1638 algorithm used on the Isolate Server.
maruel@chromium.org41601642013-09-18 19:40:46 +00001639 """
1640 try:
1641 data = json.loads(content)
1642 except ValueError:
1643 raise ConfigError('Failed to parse: %s...' % content[:100])
1644
1645 if not isinstance(data, dict):
1646 raise ConfigError('Expected dict, got %r' % data)
1647
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001648 # Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001649 value = data.get('version', '1.0')
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001650 if not isinstance(value, basestring):
1651 raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001652 try:
1653 version = tuple(map(int, value.split('.')))
1654 except ValueError:
1655 raise ConfigError('Expected valid version, got %r' % value)
1656
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001657 expected_version = tuple(
1658 map(int, isolated_format.ISOLATED_FILE_VERSION.split('.')))
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001659 # Major version must match.
1660 if version[0] != expected_version[0]:
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -05001661 raise ConfigError(
1662 'Expected compatible \'%s\' version, got %r' %
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001663 (isolated_format.ISOLATED_FILE_VERSION, value))
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001664
1665 if algo is None:
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05001666 # TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001667 # Default the algorithm used in the .isolated file itself, falls back to
1668 # 'sha-1' if unspecified.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001669 algo = isolated_format.SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001670
maruel@chromium.org41601642013-09-18 19:40:46 +00001671 for key, value in data.iteritems():
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001672 if key == 'algo':
1673 if not isinstance(value, basestring):
1674 raise ConfigError('Expected string, got %r' % value)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001675 if value not in isolated_format.SUPPORTED_ALGOS:
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001676 raise ConfigError(
1677 'Expected one of \'%s\', got %r' %
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001678 (', '.join(sorted(isolated_format.SUPPORTED_ALGOS)), value))
1679 if value != isolated_format.SUPPORTED_ALGOS_REVERSE[algo]:
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001680 raise ConfigError(
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001681 'Expected \'%s\', got %r' %
1682 (isolated_format.SUPPORTED_ALGOS_REVERSE[algo], value))
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001683
1684 elif key == 'command':
maruel@chromium.org41601642013-09-18 19:40:46 +00001685 if not isinstance(value, list):
1686 raise ConfigError('Expected list, got %r' % value)
1687 if not value:
1688 raise ConfigError('Expected non-empty command')
1689 for subvalue in value:
1690 if not isinstance(subvalue, basestring):
1691 raise ConfigError('Expected string, got %r' % subvalue)
1692
1693 elif key == 'files':
1694 if not isinstance(value, dict):
1695 raise ConfigError('Expected dict, got %r' % value)
1696 for subkey, subvalue in value.iteritems():
1697 if not isinstance(subkey, basestring):
1698 raise ConfigError('Expected string, got %r' % subkey)
1699 if not isinstance(subvalue, dict):
1700 raise ConfigError('Expected dict, got %r' % subvalue)
1701 for subsubkey, subsubvalue in subvalue.iteritems():
1702 if subsubkey == 'l':
1703 if not isinstance(subsubvalue, basestring):
1704 raise ConfigError('Expected string, got %r' % subsubvalue)
1705 elif subsubkey == 'm':
1706 if not isinstance(subsubvalue, int):
1707 raise ConfigError('Expected int, got %r' % subsubvalue)
1708 elif subsubkey == 'h':
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001709 if not isolated_format.is_valid_hash(subsubvalue, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001710 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
1711 elif subsubkey == 's':
Marc-Antoine Ruelaab3a622013-11-28 09:47:05 -05001712 if not isinstance(subsubvalue, (int, long)):
1713 raise ConfigError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001714 else:
1715 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001716 if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org41601642013-09-18 19:40:46 +00001717 raise ConfigError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001718 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
1719 subvalue)
1720 if bool('h' in subvalue) != bool('s' in subvalue):
1721 raise ConfigError(
1722 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
1723 subvalue)
1724 if bool('s' in subvalue) == bool('l' in subvalue):
1725 raise ConfigError(
1726 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
1727 subvalue)
1728 if bool('l' in subvalue) and bool('m' in subvalue):
1729 raise ConfigError(
1730 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org41601642013-09-18 19:40:46 +00001731 subvalue)
1732
1733 elif key == 'includes':
1734 if not isinstance(value, list):
1735 raise ConfigError('Expected list, got %r' % value)
1736 if not value:
1737 raise ConfigError('Expected non-empty includes list')
1738 for subvalue in value:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001739 if not isolated_format.is_valid_hash(subvalue, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001740 raise ConfigError('Expected sha-1, got %r' % subvalue)
1741
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001742 elif key == 'os':
1743 if version >= (1, 4):
1744 raise ConfigError('Key \'os\' is not allowed starting version 1.4')
1745
maruel@chromium.org41601642013-09-18 19:40:46 +00001746 elif key == 'read_only':
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001747 if not value in (0, 1, 2):
1748 raise ConfigError('Expected 0, 1 or 2, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001749
1750 elif key == 'relative_cwd':
1751 if not isinstance(value, basestring):
1752 raise ConfigError('Expected string, got %r' % value)
1753
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001754 elif key == 'version':
1755 # Already checked above.
1756 pass
1757
maruel@chromium.org41601642013-09-18 19:40:46 +00001758 else:
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001759 raise ConfigError('Unknown key %r' % key)
maruel@chromium.org41601642013-09-18 19:40:46 +00001760
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001761 # Automatically fix os.path.sep if necessary. While .isolated files are always
1762 # in the the native path format, someone could want to download an .isolated
1763 # tree from another OS.
1764 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
1765 if 'files' in data:
1766 data['files'] = dict(
1767 (k.replace(wrong_path_sep, os.path.sep), v)
1768 for k, v in data['files'].iteritems())
1769 for v in data['files'].itervalues():
1770 if 'l' in v:
1771 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
1772 if 'relative_cwd' in data:
1773 data['relative_cwd'] = data['relative_cwd'].replace(
1774 wrong_path_sep, os.path.sep)
maruel@chromium.org41601642013-09-18 19:40:46 +00001775 return data
1776
1777
1778class IsolatedFile(object):
1779 """Represents a single parsed .isolated file."""
1780 def __init__(self, obj_hash, algo):
1781 """|obj_hash| is really the sha-1 of the file."""
1782 logging.debug('IsolatedFile(%s)' % obj_hash)
1783 self.obj_hash = obj_hash
1784 self.algo = algo
1785 # Set once all the left-side of the tree is parsed. 'Tree' here means the
1786 # .isolate and all the .isolated files recursively included by it with
1787 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1788 # .isolated file in the hash table, is important, as the later ones are not
1789 # processed until the firsts are retrieved and read.
1790 self.can_fetch = False
1791
1792 # Raw data.
1793 self.data = {}
1794 # A IsolatedFile instance, one per object in self.includes.
1795 self.children = []
1796
1797 # Set once the .isolated file is loaded.
1798 self._is_parsed = False
1799 # Set once the files are fetched.
1800 self.files_fetched = False
1801
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001802 def load(self, content):
maruel@chromium.org41601642013-09-18 19:40:46 +00001803 """Verifies the .isolated file is valid and loads this object with the json
1804 data.
1805 """
1806 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1807 assert not self._is_parsed
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001808 self.data = load_isolated(content, self.algo)
maruel@chromium.org41601642013-09-18 19:40:46 +00001809 self.children = [
1810 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
1811 ]
1812 self._is_parsed = True
1813
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001814 def fetch_files(self, fetch_queue, files):
maruel@chromium.org41601642013-09-18 19:40:46 +00001815 """Adds files in this .isolated file not present in |files| dictionary.
1816
1817 Preemptively request files.
1818
1819 Note that |files| is modified by this function.
1820 """
1821 assert self.can_fetch
1822 if not self._is_parsed or self.files_fetched:
1823 return
1824 logging.debug('fetch_files(%s)' % self.obj_hash)
1825 for filepath, properties in self.data.get('files', {}).iteritems():
1826 # Root isolated has priority on the files being mapped. In particular,
1827 # overriden files must not be fetched.
1828 if filepath not in files:
1829 files[filepath] = properties
1830 if 'h' in properties:
1831 # Preemptively request files.
1832 logging.debug('fetching %s' % filepath)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001833 fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
maruel@chromium.org41601642013-09-18 19:40:46 +00001834 self.files_fetched = True
1835
1836
1837class Settings(object):
1838 """Results of a completely parsed .isolated file."""
1839 def __init__(self):
1840 self.command = []
1841 self.files = {}
1842 self.read_only = None
1843 self.relative_cwd = None
1844 # The main .isolated file, a IsolatedFile instance.
1845 self.root = None
1846
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001847 def load(self, fetch_queue, root_isolated_hash, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001848 """Loads the .isolated and all the included .isolated asynchronously.
1849
1850 It enables support for "included" .isolated files. They are processed in
1851 strict order but fetched asynchronously from the cache. This is important so
1852 that a file in an included .isolated file that is overridden by an embedding
1853 .isolated file is not fetched needlessly. The includes are fetched in one
1854 pass and the files are fetched as soon as all the ones on the left-side
1855 of the tree were fetched.
1856
1857 The prioritization is very important here for nested .isolated files.
1858 'includes' have the highest priority and the algorithm is optimized for both
1859 deep and wide trees. A deep one is a long link of .isolated files referenced
1860 one at a time by one item in 'includes'. A wide one has a large number of
1861 'includes' in a single .isolated file. 'left' is defined as an included
1862 .isolated file earlier in the 'includes' list. So the order of the elements
1863 in 'includes' is important.
1864 """
1865 self.root = IsolatedFile(root_isolated_hash, algo)
1866
1867 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1868 pending = {}
1869 # Set of hashes of already retrieved items to refuse recursive includes.
1870 seen = set()
1871
1872 def retrieve(isolated_file):
1873 h = isolated_file.obj_hash
1874 if h in seen:
1875 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1876 assert h not in pending
1877 seen.add(h)
1878 pending[h] = isolated_file
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001879 fetch_queue.add(h, priority=WorkerPool.HIGH)
maruel@chromium.org41601642013-09-18 19:40:46 +00001880
1881 retrieve(self.root)
1882
1883 while pending:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001884 item_hash = fetch_queue.wait(pending)
maruel@chromium.org41601642013-09-18 19:40:46 +00001885 item = pending.pop(item_hash)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001886 item.load(fetch_queue.cache.read(item_hash))
maruel@chromium.org41601642013-09-18 19:40:46 +00001887 if item_hash == root_isolated_hash:
1888 # It's the root item.
1889 item.can_fetch = True
1890
1891 for new_child in item.children:
1892 retrieve(new_child)
1893
1894 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001895 self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org41601642013-09-18 19:40:46 +00001896
1897 def check(n):
1898 return all(check(x) for x in n.children) and n.files_fetched
1899 assert check(self.root)
1900
1901 self.relative_cwd = self.relative_cwd or ''
maruel@chromium.org41601642013-09-18 19:40:46 +00001902
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001903 def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org41601642013-09-18 19:40:46 +00001904 if node.can_fetch:
1905 if not node.files_fetched:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001906 self._update_self(fetch_queue, node)
maruel@chromium.org41601642013-09-18 19:40:46 +00001907 will_break = False
1908 for i in node.children:
1909 if not i.can_fetch:
1910 if will_break:
1911 break
1912 # Automatically mark the first one as fetcheable.
1913 i.can_fetch = True
1914 will_break = True
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001915 self._traverse_tree(fetch_queue, i)
maruel@chromium.org41601642013-09-18 19:40:46 +00001916
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001917 def _update_self(self, fetch_queue, node):
1918 node.fetch_files(fetch_queue, self.files)
maruel@chromium.org41601642013-09-18 19:40:46 +00001919 # Grabs properties.
1920 if not self.command and node.data.get('command'):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001921 # Ensure paths are correctly separated on windows.
maruel@chromium.org41601642013-09-18 19:40:46 +00001922 self.command = node.data['command']
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001923 if self.command:
1924 self.command[0] = self.command[0].replace('/', os.path.sep)
1925 self.command = tools.fix_python_path(self.command)
maruel@chromium.org41601642013-09-18 19:40:46 +00001926 if self.read_only is None and node.data.get('read_only') is not None:
1927 self.read_only = node.data['read_only']
1928 if (self.relative_cwd is None and
1929 node.data.get('relative_cwd') is not None):
1930 self.relative_cwd = node.data['relative_cwd']
1931
1932
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001933def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001934 """Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001935
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001936 Arguments:
1937 isolated_hash: hash of the root *.isolated file.
1938 storage: Storage class that communicates with isolate storage.
1939 cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001940 outdir: Output directory to map file tree to.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001941 require_command: Ensure *.isolated specifies a command to run.
1942
1943 Returns:
1944 Settings object that holds details about loaded *.isolated file.
1945 """
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001946 logging.debug(
1947 'fetch_isolated(%s, %s, %s, %s, %s)',
1948 isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001949 # Hash algorithm to use, defined by namespace |storage| is using.
1950 algo = storage.hash_algo
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001951 with cache:
1952 fetch_queue = FetchQueue(storage, cache)
1953 settings = Settings()
1954
1955 with tools.Profiler('GetIsolateds'):
1956 # Optionally support local files by manually adding them to cache.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001957 if not isolated_format.is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001958 logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
1959 try:
1960 isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
1961 except IOError:
1962 raise MappingError(
1963 '%s doesn\'t seem to be a valid file. Did you intent to pass a '
1964 'valid hash?' % isolated_hash)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001965
1966 # Load all *.isolated and start loading rest of the files.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001967 settings.load(fetch_queue, isolated_hash, algo)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001968 if require_command and not settings.command:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001969 # TODO(vadimsh): All fetch operations are already enqueue and there's no
1970 # easy way to cancel them.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001971 raise ConfigError('No command to run')
1972
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001973 with tools.Profiler('GetRest'):
1974 # Create file system hierarchy.
1975 if not os.path.isdir(outdir):
1976 os.makedirs(outdir)
1977 create_directories(outdir, settings.files)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -05001978 create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001979
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001980 # Ensure working directory exists.
1981 cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
1982 if not os.path.isdir(cwd):
1983 os.makedirs(cwd)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001984
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001985 # Multimap: digest -> list of pairs (path, props).
1986 remaining = {}
1987 for filepath, props in settings.files.iteritems():
1988 if 'h' in props:
1989 remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001990
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001991 # Now block on the remaining files to be downloaded and mapped.
1992 logging.info('Retrieving remaining files (%d of them)...',
1993 fetch_queue.pending_count)
1994 last_update = time.time()
1995 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
1996 while remaining:
1997 detector.ping()
1998
1999 # Wait for any item to finish fetching to cache.
2000 digest = fetch_queue.wait(remaining)
2001
2002 # Link corresponding files to a fetched item in cache.
2003 for filepath, props in remaining.pop(digest):
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05002004 cache.hardlink(
2005 digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002006
2007 # Report progress.
2008 duration = time.time() - last_update
2009 if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
2010 msg = '%d files remaining...' % len(remaining)
2011 print msg
2012 logging.info(msg)
2013 last_update = time.time()
2014
2015 # Cache could evict some items we just tried to fetch, it's a fatal error.
2016 if not fetch_queue.verify_all_cached():
2017 raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002018 return settings
2019
2020
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002021def directory_to_metadata(root, algo, blacklist):
2022 """Returns the FileItem list and .isolated metadata for a directory."""
2023 root = file_path.get_native_path_case(root)
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002024 paths = expand_directory_and_symlink(
2025 root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002026 metadata = dict(
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04002027 (relpath, process_input(os.path.join(root, relpath), {}, False, algo))
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002028 for relpath in paths
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002029 )
2030 for v in metadata.itervalues():
2031 v.pop('t')
2032 items = [
2033 FileItem(
2034 path=os.path.join(root, relpath),
2035 digest=meta['h'],
2036 size=meta['s'],
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002037 high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002038 for relpath, meta in metadata.iteritems() if 'h' in meta
2039 ]
2040 return items, metadata
2041
2042
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002043def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002044 """Stores every entries and returns the relevant data.
2045
2046 Arguments:
2047 storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002048 files: list of file paths to upload. If a directory is specified, a
2049 .isolated file is created and its hash is returned.
2050 blacklist: function that returns True if a file should be omitted.
2051 """
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002052 assert all(isinstance(i, unicode) for i in files), files
2053 if len(files) != len(set(map(os.path.abspath, files))):
2054 raise Error('Duplicate entries found.')
2055
2056 results = []
2057 # The temporary directory is only created as needed.
2058 tempdir = None
2059 try:
2060 # TODO(maruel): Yield the files to a worker thread.
2061 items_to_upload = []
2062 for f in files:
2063 try:
2064 filepath = os.path.abspath(f)
2065 if os.path.isdir(filepath):
2066 # Uploading a whole directory.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002067 items, metadata = directory_to_metadata(
2068 filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002069
2070 # Create the .isolated file.
2071 if not tempdir:
2072 tempdir = tempfile.mkdtemp(prefix='isolateserver')
2073 handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
2074 os.close(handle)
2075 data = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002076 'algo':
2077 isolated_format.SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002078 'files': metadata,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002079 'version': isolated_format.ISOLATED_FILE_VERSION,
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002080 }
2081 save_isolated(isolated, data)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002082 h = isolated_format.hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002083 items_to_upload.extend(items)
2084 items_to_upload.append(
2085 FileItem(
2086 path=isolated,
2087 digest=h,
2088 size=os.stat(isolated).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002089 high_priority=True))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002090 results.append((h, f))
2091
2092 elif os.path.isfile(filepath):
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002093 h = isolated_format.hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002094 items_to_upload.append(
2095 FileItem(
2096 path=filepath,
2097 digest=h,
2098 size=os.stat(filepath).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002099 high_priority=f.endswith('.isolated')))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002100 results.append((h, f))
2101 else:
2102 raise Error('%s is neither a file or directory.' % f)
2103 except OSError:
2104 raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002105 # Technically we would care about which files were uploaded but we don't
2106 # much in practice.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002107 _uploaded_files = storage.upload_items(items_to_upload)
2108 return results
2109 finally:
2110 if tempdir:
2111 shutil.rmtree(tempdir)
2112
2113
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002114def archive(out, namespace, files, blacklist):
2115 if files == ['-']:
2116 files = sys.stdin.readlines()
2117
2118 if not files:
2119 raise Error('Nothing to upload')
2120
2121 files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002122 blacklist = tools.gen_blacklist(blacklist)
2123 with get_storage(out, namespace) as storage:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002124 results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002125 print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
2126
2127
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002128@subcommand.usage('<file1..fileN> or - to read from stdin')
2129def CMDarchive(parser, args):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002130 """Archives data to the server.
2131
2132 If a directory is specified, a .isolated file is created the whole directory
2133 is uploaded. Then this .isolated file can be included in another one to run
2134 commands.
2135
2136 The commands output each file that was processed with its content hash. For
2137 directories, the .isolated generated for the directory is listed as the
2138 directory entry itself.
2139 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002140 add_isolate_server_options(parser, False)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002141 parser.add_option(
2142 '--blacklist',
2143 action='append', default=list(DEFAULT_BLACKLIST),
2144 help='List of regexp to use as blacklist filter when uploading '
2145 'directories')
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002146 options, files = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002147 process_isolate_server_options(parser, options)
Vadim Shtayura6b555c12014-07-23 16:22:18 -07002148 if file_path.is_url(options.isolate_server):
2149 auth.ensure_logged_in(options.isolate_server)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002150 try:
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002151 archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002152 except Error as e:
2153 parser.error(e.args[0])
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002154 return 0
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002155
2156
2157def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002158 """Download data from the server.
2159
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002160 It can either download individual files or a complete tree from a .isolated
2161 file.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002162 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002163 add_isolate_server_options(parser, True)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002164 parser.add_option(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002165 '-i', '--isolated', metavar='HASH',
2166 help='hash of an isolated file, .isolated file content is discarded, use '
2167 '--file if you need it')
2168 parser.add_option(
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002169 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
2170 help='hash and destination of a file, can be used multiple times')
2171 parser.add_option(
2172 '-t', '--target', metavar='DIR', default=os.getcwd(),
2173 help='destination directory')
2174 options, args = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002175 process_isolate_server_options(parser, options)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002176 if args:
2177 parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002178 if bool(options.isolated) == bool(options.file):
2179 parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002180
2181 options.target = os.path.abspath(options.target)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002182
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002183 remote = options.isolate_server or options.indir
Vadim Shtayura6b555c12014-07-23 16:22:18 -07002184 if file_path.is_url(remote):
2185 auth.ensure_logged_in(remote)
2186
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002187 with get_storage(remote, options.namespace) as storage:
Vadim Shtayura3172be52013-12-03 12:49:05 -08002188 # Fetching individual files.
2189 if options.file:
2190 channel = threading_utils.TaskChannel()
2191 pending = {}
2192 for digest, dest in options.file:
2193 pending[digest] = dest
2194 storage.async_fetch(
2195 channel,
2196 WorkerPool.MED,
2197 digest,
2198 UNKNOWN_FILE_SIZE,
2199 functools.partial(file_write, os.path.join(options.target, dest)))
2200 while pending:
2201 fetched = channel.pull()
2202 dest = pending.pop(fetched)
2203 logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002204
Vadim Shtayura3172be52013-12-03 12:49:05 -08002205 # Fetching whole isolated tree.
2206 if options.isolated:
2207 settings = fetch_isolated(
2208 isolated_hash=options.isolated,
2209 storage=storage,
2210 cache=MemoryCache(),
Vadim Shtayura3172be52013-12-03 12:49:05 -08002211 outdir=options.target,
Vadim Shtayura3172be52013-12-03 12:49:05 -08002212 require_command=False)
2213 rel = os.path.join(options.target, settings.relative_cwd)
2214 print('To run this test please run from the directory %s:' %
2215 os.path.join(options.target, rel))
2216 print(' ' + ' '.join(settings.command))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002217
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002218 return 0
2219
2220
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002221@subcommand.usage('<file1..fileN> or - to read from stdin')
2222def CMDhashtable(parser, args):
2223 """Archives data to a hashtable on the file system.
2224
2225 If a directory is specified, a .isolated file is created the whole directory
2226 is uploaded. Then this .isolated file can be included in another one to run
2227 commands.
2228
2229 The commands output each file that was processed with its content hash. For
2230 directories, the .isolated generated for the directory is listed as the
2231 directory entry itself.
2232 """
2233 add_outdir_options(parser)
2234 parser.add_option(
2235 '--blacklist',
2236 action='append', default=list(DEFAULT_BLACKLIST),
2237 help='List of regexp to use as blacklist filter when uploading '
2238 'directories')
2239 options, files = parser.parse_args(args)
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002240 process_outdir_options(parser, options, os.getcwd())
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002241 try:
2242 # Do not compress files when archiving to the file system.
2243 archive(options.outdir, 'default', files, options.blacklist)
2244 except Error as e:
2245 parser.error(e.args[0])
2246 return 0
2247
2248
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002249def add_isolate_server_options(parser, add_indir):
2250 """Adds --isolate-server and --namespace options to parser.
2251
2252 Includes --indir if desired.
2253 """
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002254 parser.add_option(
2255 '-I', '--isolate-server',
2256 metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002257 help='URL of the Isolate Server to use. Defaults to the environment '
2258 'variable ISOLATE_SERVER if set. No need to specify https://, this '
2259 'is assumed.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002260 parser.add_option(
2261 '--namespace', default='default-gzip',
2262 help='The namespace to use on the Isolate Server, default: %default')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002263 if add_indir:
2264 parser.add_option(
2265 '--indir', metavar='DIR',
2266 help='Directory used to store the hashtable instead of using an '
2267 'isolate server.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002268
2269
2270def process_isolate_server_options(parser, options):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002271 """Processes the --isolate-server and --indir options and aborts if neither is
2272 specified.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002273 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002274 has_indir = hasattr(options, 'indir')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002275 if not options.isolate_server:
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002276 if not has_indir:
2277 parser.error('--isolate-server is required.')
2278 elif not options.indir:
2279 parser.error('Use one of --indir or --isolate-server.')
2280 else:
2281 if has_indir and options.indir:
2282 parser.error('Use only one of --indir or --isolate-server.')
2283
2284 if options.isolate_server:
2285 parts = urlparse.urlparse(options.isolate_server, 'https')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002286 if parts.query:
2287 parser.error('--isolate-server doesn\'t support query parameter.')
2288 if parts.fragment:
2289 parser.error('--isolate-server doesn\'t support fragment in the url.')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002290 # urlparse('foo.com') will result in netloc='', path='foo.com', which is not
2291 # what is desired here.
2292 new = list(parts)
2293 if not new[1] and new[2]:
2294 new[1] = new[2].rstrip('/')
2295 new[2] = ''
2296 new[2] = new[2].rstrip('/')
2297 options.isolate_server = urlparse.urlunparse(new)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04002298 on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002299 return
2300
2301 if file_path.is_url(options.indir):
2302 parser.error('Can\'t use an URL for --indir.')
2303 options.indir = unicode(options.indir).replace('/', os.path.sep)
2304 options.indir = os.path.abspath(
2305 os.path.normpath(os.path.join(os.getcwd(), options.indir)))
2306 if not os.path.isdir(options.indir):
2307 parser.error('Path given to --indir must exist.')
2308
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002309
2310
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002311def add_outdir_options(parser):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002312 """Adds --outdir, which is orthogonal to --isolate-server.
2313
2314 Note: On upload, separate commands are used between 'archive' and 'hashtable'.
2315 On 'download', the same command can download from either an isolate server or
2316 a file system.
2317 """
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002318 parser.add_option(
2319 '-o', '--outdir', metavar='DIR',
2320 help='Directory used to recreate the tree.')
2321
2322
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002323def process_outdir_options(parser, options, cwd):
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002324 if not options.outdir:
2325 parser.error('--outdir is required.')
2326 if file_path.is_url(options.outdir):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002327 parser.error('Can\'t use an URL for --outdir.')
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002328 options.outdir = unicode(options.outdir).replace('/', os.path.sep)
2329 # outdir doesn't need native path case since tracing is never done from there.
2330 options.outdir = os.path.abspath(
2331 os.path.normpath(os.path.join(cwd, options.outdir)))
2332 # In theory, we'd create the directory outdir right away. Defer doing it in
2333 # case there's errors in the command line.
2334
2335
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002336class OptionParserIsolateServer(tools.OptionParserWithLogging):
2337 def __init__(self, **kwargs):
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002338 tools.OptionParserWithLogging.__init__(
2339 self,
2340 version=__version__,
2341 prog=os.path.basename(sys.modules[__name__].__file__),
2342 **kwargs)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08002343 auth.add_auth_options(self)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002344
2345 def parse_args(self, *args, **kwargs):
2346 options, args = tools.OptionParserWithLogging.parse_args(
2347 self, *args, **kwargs)
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08002348 auth.process_auth_options(self, options)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002349 return options, args
2350
2351
2352def main(args):
2353 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04002354 return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00002355
2356
2357if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002358 fix_encoding.fix_encoding()
2359 tools.disable_buffering()
2360 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002361 sys.exit(main(sys.argv[1:]))