blob: 68008986bc61de8c9c5732d442b50a709408b45a [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00005
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05006"""Archives a set of files or directories to a server."""
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00007
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04008__version__ = '0.3.4'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +000010import functools
maruel@chromium.org41601642013-09-18 19:40:46 +000011import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000012import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000013import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000014import re
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050015import shutil
16import stat
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000017import sys
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -050018import tempfile
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000019import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000020import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000021import urllib
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050022import urlparse
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000023import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000024
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000025from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
28
Marc-Antoine Ruel37989932013-11-19 16:28:08 -050029from utils import file_path
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040031from utils import on_error
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000032from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000033from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000034
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080035import auth
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040036import isolated_format
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080037
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -040038# TODO(maruel): Temporary to make the next code migration simpler.
39from isolated_format import IsolatedError, MappingError, UNKNOWN_FILE_SIZE
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000040
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000041# Version of isolate protocol passed to the server in /handshake request.
42ISOLATE_PROTOCOL_VERSION = '1.0'
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000043
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000044
45# The number of files to check the isolate server per /pre-upload query.
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000046# All files are sorted by likelihood of a change in the file content
47# (currently file size is used to estimate this: larger the file -> larger the
48# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000049# are taken and send to '/pre-upload', then next ITEMS_PER_CONTAINS_QUERIES[1],
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000050# and so on. Numbers here is a trade-off; the more per request, the lower the
51# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
52# larger values cause longer lookups, increasing the initial latency to start
53# uploading, which is especially an issue for large files. This value is
54# optimized for the "few thousands files to look up with minimal number of large
55# files missing" case.
56ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000057
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000058
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000059# A list of already compressed extension types that should not receive any
60# compression before being uploaded.
61ALREADY_COMPRESSED_TYPES = [
Marc-Antoine Ruel7f234c82014-08-06 21:55:18 -040062 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'mp4', 'pdf',
63 'png', 'wav', 'zip',
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000064]
65
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000066
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +000067# Chunk size to use when reading from network stream.
68NET_IO_FILE_CHUNK = 16 * 1024
69
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000070
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000071# Read timeout in seconds for downloads from isolate storage. If there's no
72# response from the server within this timeout whole download will be aborted.
73DOWNLOAD_READ_TIMEOUT = 60
74
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +000075# Maximum expected delay (in seconds) between successive file fetches
76# in run_tha_test. If it takes longer than that, a deadlock might be happening
77# and all stack frames for all threads are dumped to log.
78DEADLOCK_TIMEOUT = 5 * 60
79
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000080
maruel@chromium.org41601642013-09-18 19:40:46 +000081# The delay (in seconds) to wait between logging statements when retrieving
82# the required files. This is intended to let the user (or buildbot) know that
83# the program is still running.
84DELAY_BETWEEN_UPDATES_IN_SECS = 30
85
86
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -050087DEFAULT_BLACKLIST = (
88 # Temporary vim or python files.
89 r'^.+\.(?:pyc|swp)$',
90 # .git or .svn directory.
91 r'^(?:.+' + re.escape(os.path.sep) + r'|)\.(?:git|svn)$',
92)
93
94
95# Chromium-specific.
96DEFAULT_BLACKLIST += (
97 r'^.+\.(?:run_test_cases)$',
98 r'^(?:.+' + re.escape(os.path.sep) + r'|)testserver\.log$',
99)
100
101
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -0500102class Error(Exception):
103 """Generic runtime error."""
104 pass
105
106
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000107def stream_read(stream, chunk_size):
108 """Reads chunks from |stream| and yields them."""
109 while True:
110 data = stream.read(chunk_size)
111 if not data:
112 break
113 yield data
114
115
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400116def file_read(filepath, chunk_size=isolated_format.DISK_FILE_CHUNK, offset=0):
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800117 """Yields file content in chunks of |chunk_size| starting from |offset|."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000118 with open(filepath, 'rb') as f:
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800119 if offset:
120 f.seek(offset)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000121 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000122 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000123 if not data:
124 break
125 yield data
126
127
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000128def file_write(filepath, content_generator):
129 """Writes file content as generated by content_generator.
130
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000131 Creates the intermediary directory as needed.
132
133 Returns the number of bytes written.
134
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000135 Meant to be mocked out in unit tests.
136 """
137 filedir = os.path.dirname(filepath)
138 if not os.path.isdir(filedir):
139 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000140 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000141 with open(filepath, 'wb') as f:
142 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000143 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000144 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000145 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000146
147
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000148def zip_compress(content_generator, level=7):
149 """Reads chunks from |content_generator| and yields zip compressed chunks."""
150 compressor = zlib.compressobj(level)
151 for chunk in content_generator:
152 compressed = compressor.compress(chunk)
153 if compressed:
154 yield compressed
155 tail = compressor.flush(zlib.Z_FINISH)
156 if tail:
157 yield tail
158
159
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400160def zip_decompress(
161 content_generator, chunk_size=isolated_format.DISK_FILE_CHUNK):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000162 """Reads zipped data from |content_generator| and yields decompressed data.
163
164 Decompresses data in small chunks (no larger than |chunk_size|) so that
165 zip bomb file doesn't cause zlib to preallocate huge amount of memory.
166
167 Raises IOError if data is corrupted or incomplete.
168 """
169 decompressor = zlib.decompressobj()
170 compressed_size = 0
171 try:
172 for chunk in content_generator:
173 compressed_size += len(chunk)
174 data = decompressor.decompress(chunk, chunk_size)
175 if data:
176 yield data
177 while decompressor.unconsumed_tail:
178 data = decompressor.decompress(decompressor.unconsumed_tail, chunk_size)
179 if data:
180 yield data
181 tail = decompressor.flush()
182 if tail:
183 yield tail
184 except zlib.error as e:
185 raise IOError(
186 'Corrupted zip stream (read %d bytes) - %s' % (compressed_size, e))
187 # Ensure all data was read and decompressed.
188 if decompressor.unused_data or decompressor.unconsumed_tail:
189 raise IOError('Not all data was decompressed')
190
191
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000192def get_zip_compression_level(filename):
193 """Given a filename calculates the ideal zip compression level to use."""
194 file_ext = os.path.splitext(filename)[1].lower()
195 # TODO(csharp): Profile to find what compression level works best.
196 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
197
198
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000199def create_directories(base_directory, files):
200 """Creates the directory structure needed by the given list of files."""
201 logging.debug('create_directories(%s, %d)', base_directory, len(files))
202 # Creates the tree of directories to create.
203 directories = set(os.path.dirname(f) for f in files)
204 for item in list(directories):
205 while item:
206 directories.add(item)
207 item = os.path.dirname(item)
208 for d in sorted(directories):
209 if d:
210 os.mkdir(os.path.join(base_directory, d))
211
212
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500213def create_symlinks(base_directory, files):
214 """Creates any symlinks needed by the given set of files."""
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000215 for filepath, properties in files:
216 if 'l' not in properties:
217 continue
218 if sys.platform == 'win32':
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500219 # TODO(maruel): Create symlink via the win32 api.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000220 logging.warning('Ignoring symlink %s', filepath)
221 continue
222 outfile = os.path.join(base_directory, filepath)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -0500223 # os.symlink() doesn't exist on Windows.
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000224 os.symlink(properties['l'], outfile) # pylint: disable=E1101
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000225
226
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000227def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000228 """Determines if the given files appears valid.
229
230 Currently it just checks the file's size.
231 """
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -0400232 if size == isolated_format.UNKNOWN_FILE_SIZE:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000233 return os.path.isfile(filepath)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000234 actual_size = os.stat(filepath).st_size
235 if size != actual_size:
236 logging.warning(
237 'Found invalid item %s; %d != %d',
238 os.path.basename(filepath), actual_size, size)
239 return False
240 return True
241
242
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000243class WorkerPool(threading_utils.AutoRetryThreadPool):
244 """Thread pool that automatically retries on IOError and runs a preconfigured
245 function.
246 """
247 # Initial and maximum number of worker threads.
248 INITIAL_WORKERS = 2
249 MAX_WORKERS = 16
250 RETRIES = 5
251
252 def __init__(self):
253 super(WorkerPool, self).__init__(
254 [IOError],
255 self.RETRIES,
256 self.INITIAL_WORKERS,
257 self.MAX_WORKERS,
258 0,
259 'remote')
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000260
261
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000262class Item(object):
263 """An item to push to Storage.
264
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800265 Its digest and size may be provided in advance, if known. Otherwise they will
266 be derived from content(). If digest is provided, it MUST correspond to
267 hash algorithm used by Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000268
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800269 When used with Storage, Item starts its life in a main thread, travels
270 to 'contains' thread, then to 'push' thread and then finally back to
271 the main thread. It is never used concurrently from multiple threads.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000272 """
273
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800274 def __init__(self, digest=None, size=None, high_priority=False):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000275 self.digest = digest
276 self.size = size
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800277 self.high_priority = high_priority
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000278 self.compression_level = 6
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000279
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800280 def content(self):
281 """Iterable with content of this item as byte string (str) chunks."""
282 raise NotImplementedError()
283
284 def prepare(self, hash_algo):
285 """Ensures self.digest and self.size are set.
286
287 Uses content() as a source of data to calculate them. Does nothing if digest
288 and size is already known.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000289
290 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800291 hash_algo: hash algorithm to use to calculate digest.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000292 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800293 if self.digest is None or self.size is None:
294 digest = hash_algo()
295 total = 0
296 for chunk in self.content():
297 digest.update(chunk)
298 total += len(chunk)
299 self.digest = digest.hexdigest()
300 self.size = total
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000301
302
303class FileItem(Item):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800304 """A file to push to Storage.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000305
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800306 Its digest and size may be provided in advance, if known. Otherwise they will
307 be derived from the file content.
308 """
309
310 def __init__(self, path, digest=None, size=None, high_priority=False):
311 super(FileItem, self).__init__(
312 digest,
313 size if size is not None else os.stat(path).st_size,
314 high_priority)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000315 self.path = path
316 self.compression_level = get_zip_compression_level(path)
317
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800318 def content(self):
319 return file_read(self.path)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000320
321
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000322class BufferItem(Item):
323 """A byte buffer to push to Storage."""
324
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800325 def __init__(self, buf, high_priority=False):
326 super(BufferItem, self).__init__(None, len(buf), high_priority)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000327 self.buffer = buf
328
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800329 def content(self):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000330 return [self.buffer]
331
332
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000333class Storage(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800334 """Efficiently downloads or uploads large set of files via StorageApi.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000335
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800336 Implements compression support, parallel 'contains' checks, parallel uploads
337 and more.
338
339 Works only within single namespace (and thus hashing algorithm and compression
340 scheme are fixed).
341
342 Spawns multiple internal threads. Thread safe, but not fork safe.
343 """
344
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700345 def __init__(self, storage_api):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000346 self._storage_api = storage_api
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700347 self._use_zip = is_namespace_with_compression(storage_api.namespace)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400348 self._hash_algo = isolated_format.get_hash_algo(storage_api.namespace)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000349 self._cpu_thread_pool = None
350 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000351
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000352 @property
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700353 def hash_algo(self):
354 """Hashing algorithm used to name files in storage based on their content.
355
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400356 Defined by |namespace|. See also isolated_format.get_hash_algo().
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700357 """
358 return self._hash_algo
359
360 @property
361 def location(self):
362 """Location of a backing store that this class is using.
363
364 Exact meaning depends on the storage_api type. For IsolateServer it is
365 an URL of isolate server, for FileSystem is it a path in file system.
366 """
367 return self._storage_api.location
368
369 @property
370 def namespace(self):
371 """Isolate namespace used by this storage.
372
373 Indirectly defines hashing scheme and compression method used.
374 """
375 return self._storage_api.namespace
376
377 @property
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000378 def cpu_thread_pool(self):
379 """ThreadPool for CPU-bound tasks like zipping."""
380 if self._cpu_thread_pool is None:
381 self._cpu_thread_pool = threading_utils.ThreadPool(
382 2, max(threading_utils.num_processors(), 2), 0, 'zip')
383 return self._cpu_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000384
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000385 @property
386 def net_thread_pool(self):
387 """AutoRetryThreadPool for IO-bound tasks, retries IOError."""
388 if self._net_thread_pool is None:
389 self._net_thread_pool = WorkerPool()
390 return self._net_thread_pool
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000391
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000392 def close(self):
393 """Waits for all pending tasks to finish."""
394 if self._cpu_thread_pool:
395 self._cpu_thread_pool.join()
396 self._cpu_thread_pool.close()
397 self._cpu_thread_pool = None
398 if self._net_thread_pool:
399 self._net_thread_pool.join()
400 self._net_thread_pool.close()
401 self._net_thread_pool = None
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000402
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000403 def __enter__(self):
404 """Context manager interface."""
405 return self
406
407 def __exit__(self, _exc_type, _exc_value, _traceback):
408 """Context manager interface."""
409 self.close()
410 return False
411
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000412 def upload_items(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800413 """Uploads a bunch of items to the isolate server.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000414
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800415 It figures out what items are missing from the server and uploads only them.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000416
417 Arguments:
418 items: list of Item instances that represents data to upload.
419
420 Returns:
421 List of items that were uploaded. All other items are already there.
422 """
423 # TODO(vadimsh): Optimize special case of len(items) == 1 that is frequently
424 # used by swarming.py. There's no need to spawn multiple threads and try to
425 # do stuff in parallel: there's nothing to parallelize. 'contains' check and
426 # 'push' should be performed sequentially in the context of current thread.
427
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800428 # Ensure all digests are calculated.
429 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700430 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800431
vadimsh@chromium.org672cd2b2013-10-08 17:49:33 +0000432 # For each digest keep only first Item that matches it. All other items
433 # are just indistinguishable copies from the point of view of isolate
434 # server (it doesn't care about paths at all, only content and digests).
435 seen = {}
436 duplicates = 0
437 for item in items:
438 if seen.setdefault(item.digest, item) is not item:
439 duplicates += 1
440 items = seen.values()
441 if duplicates:
442 logging.info('Skipped %d duplicated files', duplicates)
443
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000444 # Enqueue all upload tasks.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000445 missing = set()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000446 uploaded = []
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800447 channel = threading_utils.TaskChannel()
448 for missing_item, push_state in self.get_missing_items(items):
449 missing.add(missing_item)
450 self.async_push(channel, missing_item, push_state)
451
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000452 # No need to spawn deadlock detector thread if there's nothing to upload.
453 if missing:
454 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
455 # Wait for all started uploads to finish.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000456 while len(uploaded) != len(missing):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000457 detector.ping()
458 item = channel.pull()
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000459 uploaded.append(item)
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000460 logging.debug(
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000461 'Uploaded %d / %d: %s', len(uploaded), len(missing), item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000462 logging.info('All files are uploaded')
463
464 # Print stats.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000465 total = len(items)
466 total_size = sum(f.size for f in items)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000467 logging.info(
468 'Total: %6d, %9.1fkb',
469 total,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000470 total_size / 1024.)
471 cache_hit = set(items) - missing
472 cache_hit_size = sum(f.size for f in cache_hit)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000473 logging.info(
474 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
475 len(cache_hit),
476 cache_hit_size / 1024.,
477 len(cache_hit) * 100. / total,
478 cache_hit_size * 100. / total_size if total_size else 0)
479 cache_miss = missing
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000480 cache_miss_size = sum(f.size for f in cache_miss)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000481 logging.info(
482 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
483 len(cache_miss),
484 cache_miss_size / 1024.,
485 len(cache_miss) * 100. / total,
486 cache_miss_size * 100. / total_size if total_size else 0)
487
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000488 return uploaded
489
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800490 def get_fetch_url(self, item):
491 """Returns an URL that can be used to fetch given item once it's uploaded.
492
493 Note that if namespace uses compression, data at given URL is compressed.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000494
495 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800496 item: Item to get fetch URL for.
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000497
498 Returns:
499 An URL or None if underlying protocol doesn't support this.
500 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700501 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800502 return self._storage_api.get_fetch_url(item.digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000503
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800504 def async_push(self, channel, item, push_state):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000505 """Starts asynchronous push to the server in a parallel thread.
506
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800507 Can be used only after |item| was checked for presence on a server with
508 'get_missing_items' call. 'get_missing_items' returns |push_state| object
509 that contains storage specific information describing how to upload
510 the item (for example in case of cloud storage, it is signed upload URLs).
511
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000512 Arguments:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000513 channel: TaskChannel that receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000514 item: item to upload as instance of Item class.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800515 push_state: push state returned by 'get_missing_items' call for |item|.
516
517 Returns:
518 None, but |channel| later receives back |item| when upload ends.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000519 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800520 # Thread pool task priority.
521 priority = WorkerPool.HIGH if item.high_priority else WorkerPool.MED
522
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000523 def push(content):
Marc-Antoine Ruel095a8be2014-03-21 14:58:19 -0400524 """Pushes an Item and returns it to |channel|."""
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700525 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800526 self._storage_api.push(item, push_state, content)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000527 return item
528
529 # If zipping is not required, just start a push task.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700530 if not self._use_zip:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800531 self.net_thread_pool.add_task_with_channel(
532 channel, priority, push, item.content())
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000533 return
534
535 # If zipping is enabled, zip in a separate thread.
536 def zip_and_push():
537 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
538 # content right here. It will block until all file is zipped.
539 try:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800540 stream = zip_compress(item.content(), item.compression_level)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000541 data = ''.join(stream)
542 except Exception as exc:
543 logging.error('Failed to zip \'%s\': %s', item, exc)
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800544 channel.send_exception()
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000545 return
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000546 self.net_thread_pool.add_task_with_channel(
547 channel, priority, push, [data])
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000548 self.cpu_thread_pool.add_task(priority, zip_and_push)
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000549
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800550 def push(self, item, push_state):
551 """Synchronously pushes a single item to the server.
552
553 If you need to push many items at once, consider using 'upload_items' or
554 'async_push' with instance of TaskChannel.
555
556 Arguments:
557 item: item to upload as instance of Item class.
558 push_state: push state returned by 'get_missing_items' call for |item|.
559
560 Returns:
561 Pushed item (same object as |item|).
562 """
563 channel = threading_utils.TaskChannel()
564 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT):
565 self.async_push(channel, item, push_state)
566 pushed = channel.pull()
567 assert pushed is item
568 return item
569
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000570 def async_fetch(self, channel, priority, digest, size, sink):
571 """Starts asynchronous fetch from the server in a parallel thread.
572
573 Arguments:
574 channel: TaskChannel that receives back |digest| when download ends.
575 priority: thread pool task priority for the fetch.
576 digest: hex digest of an item to download.
577 size: expected size of the item (after decompression).
578 sink: function that will be called as sink(generator).
579 """
580 def fetch():
581 try:
582 # Prepare reading pipeline.
583 stream = self._storage_api.fetch(digest)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700584 if self._use_zip:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400585 stream = zip_decompress(stream, isolated_format.DISK_FILE_CHUNK)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000586 # Run |stream| through verifier that will assert its size.
587 verifier = FetchStreamVerifier(stream, size)
588 # Verified stream goes to |sink|.
589 sink(verifier.run())
590 except Exception as err:
Vadim Shtayura0ffc4092013-11-20 17:49:52 -0800591 logging.error('Failed to fetch %s: %s', digest, err)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000592 raise
593 return digest
594
595 # Don't bother with zip_thread_pool for decompression. Decompression is
596 # really fast and most probably IO bound anyway.
597 self.net_thread_pool.add_task_with_channel(channel, priority, fetch)
598
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000599 def get_missing_items(self, items):
600 """Yields items that are missing from the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000601
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000602 Issues multiple parallel queries via StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000603
604 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000605 items: a list of Item objects to check.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000606
607 Yields:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800608 For each missing item it yields a pair (item, push_state), where:
609 * item - Item object that is missing (one of |items|).
610 * push_state - opaque object that contains storage specific information
611 describing how to upload the item (for example in case of cloud
612 storage, it is signed upload URLs). It can later be passed to
613 'async_push'.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000614 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000615 channel = threading_utils.TaskChannel()
616 pending = 0
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800617
618 # Ensure all digests are calculated.
619 for item in items:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700620 item.prepare(self._hash_algo)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800621
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000622 # Enqueue all requests.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800623 for batch in batch_items_for_check(items):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000624 self.net_thread_pool.add_task_with_channel(channel, WorkerPool.HIGH,
625 self._storage_api.contains, batch)
626 pending += 1
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800627
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000628 # Yield results as they come in.
629 for _ in xrange(pending):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800630 for missing_item, push_state in channel.pull().iteritems():
631 yield missing_item, push_state
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000632
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000633
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800634def batch_items_for_check(items):
635 """Splits list of items to check for existence on the server into batches.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000636
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800637 Each batch corresponds to a single 'exists?' query to the server via a call
638 to StorageApi's 'contains' method.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000639
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800640 Arguments:
641 items: a list of Item objects.
642
643 Yields:
644 Batches of items to query for existence in a single operation,
645 each batch is a list of Item objects.
646 """
647 batch_count = 0
648 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
649 next_queries = []
650 for item in sorted(items, key=lambda x: x.size, reverse=True):
651 next_queries.append(item)
652 if len(next_queries) == batch_size_limit:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000653 yield next_queries
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800654 next_queries = []
655 batch_count += 1
656 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
657 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
658 if next_queries:
659 yield next_queries
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000660
661
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000662class FetchQueue(object):
663 """Fetches items from Storage and places them into LocalCache.
664
665 It manages multiple concurrent fetch operations. Acts as a bridge between
666 Storage and LocalCache so that Storage and LocalCache don't depend on each
667 other at all.
668 """
669
670 def __init__(self, storage, cache):
671 self.storage = storage
672 self.cache = cache
673 self._channel = threading_utils.TaskChannel()
674 self._pending = set()
675 self._accessed = set()
676 self._fetched = cache.cached_set()
677
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800678 def add(self, digest, size=UNKNOWN_FILE_SIZE, priority=WorkerPool.MED):
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000679 """Starts asynchronous fetch of item |digest|."""
680 # Fetching it now?
681 if digest in self._pending:
682 return
683
684 # Mark this file as in use, verify_all_cached will later ensure it is still
685 # in cache.
686 self._accessed.add(digest)
687
688 # Already fetched? Notify cache to update item's LRU position.
689 if digest in self._fetched:
690 # 'touch' returns True if item is in cache and not corrupted.
691 if self.cache.touch(digest, size):
692 return
693 # Item is corrupted, remove it from cache and fetch it again.
694 self._fetched.remove(digest)
695 self.cache.evict(digest)
696
697 # TODO(maruel): It should look at the free disk space, the current cache
698 # size and the size of the new item on every new item:
699 # - Trim the cache as more entries are listed when free disk space is low,
700 # otherwise if the amount of data downloaded during the run > free disk
701 # space, it'll crash.
702 # - Make sure there's enough free disk space to fit all dependencies of
703 # this run! If not, abort early.
704
705 # Start fetching.
706 self._pending.add(digest)
707 self.storage.async_fetch(
708 self._channel, priority, digest, size,
709 functools.partial(self.cache.write, digest))
710
711 def wait(self, digests):
712 """Starts a loop that waits for at least one of |digests| to be retrieved.
713
714 Returns the first digest retrieved.
715 """
716 # Flush any already fetched items.
717 for digest in digests:
718 if digest in self._fetched:
719 return digest
720
721 # Ensure all requested items are being fetched now.
722 assert all(digest in self._pending for digest in digests), (
723 digests, self._pending)
724
725 # Wait for some requested item to finish fetching.
726 while self._pending:
727 digest = self._channel.pull()
728 self._pending.remove(digest)
729 self._fetched.add(digest)
730 if digest in digests:
731 return digest
732
733 # Should never reach this point due to assert above.
734 raise RuntimeError('Impossible state')
735
736 def inject_local_file(self, path, algo):
737 """Adds local file to the cache as if it was fetched from storage."""
738 with open(path, 'rb') as f:
739 data = f.read()
740 digest = algo(data).hexdigest()
741 self.cache.write(digest, [data])
742 self._fetched.add(digest)
743 return digest
744
745 @property
746 def pending_count(self):
747 """Returns number of items to be fetched."""
748 return len(self._pending)
749
750 def verify_all_cached(self):
751 """True if all accessed items are in cache."""
752 return self._accessed.issubset(self.cache.cached_set())
753
754
755class FetchStreamVerifier(object):
756 """Verifies that fetched file is valid before passing it to the LocalCache."""
757
758 def __init__(self, stream, expected_size):
759 self.stream = stream
760 self.expected_size = expected_size
761 self.current_size = 0
762
763 def run(self):
764 """Generator that yields same items as |stream|.
765
766 Verifies |stream| is complete before yielding a last chunk to consumer.
767
768 Also wraps IOError produced by consumer into MappingError exceptions since
769 otherwise Storage will retry fetch on unrelated local cache errors.
770 """
771 # Read one chunk ahead, keep it in |stored|.
772 # That way a complete stream can be verified before pushing last chunk
773 # to consumer.
774 stored = None
775 for chunk in self.stream:
776 assert chunk is not None
777 if stored is not None:
778 self._inspect_chunk(stored, is_last=False)
779 try:
780 yield stored
781 except IOError as exc:
782 raise MappingError('Failed to store an item in cache: %s' % exc)
783 stored = chunk
784 if stored is not None:
785 self._inspect_chunk(stored, is_last=True)
786 try:
787 yield stored
788 except IOError as exc:
789 raise MappingError('Failed to store an item in cache: %s' % exc)
790
791 def _inspect_chunk(self, chunk, is_last):
792 """Called for each fetched chunk before passing it to consumer."""
793 self.current_size += len(chunk)
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -0400794 if (is_last and
795 (self.expected_size != isolated_format.UNKNOWN_FILE_SIZE) and
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +0000796 (self.expected_size != self.current_size)):
797 raise IOError('Incorrect file size: expected %d, got %d' % (
798 self.expected_size, self.current_size))
799
800
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000801class StorageApi(object):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800802 """Interface for classes that implement low-level storage operations.
803
804 StorageApi is oblivious of compression and hashing scheme used. This details
805 are handled in higher level Storage class.
806
807 Clients should generally not use StorageApi directly. Storage class is
808 preferred since it implements compression and upload optimizations.
809 """
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000810
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700811 @property
812 def location(self):
813 """Location of a backing store that this class is using.
814
815 Exact meaning depends on the type. For IsolateServer it is an URL of isolate
816 server, for FileSystem is it a path in file system.
817 """
818 raise NotImplementedError()
819
820 @property
821 def namespace(self):
822 """Isolate namespace used by this storage.
823
824 Indirectly defines hashing scheme and compression method used.
825 """
826 raise NotImplementedError()
827
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000828 def get_fetch_url(self, digest):
829 """Returns an URL that can be used to fetch an item with given digest.
830
831 Arguments:
832 digest: hex digest of item to fetch.
833
834 Returns:
835 An URL or None if the protocol doesn't support this.
836 """
837 raise NotImplementedError()
838
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800839 def fetch(self, digest, offset=0):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000840 """Fetches an object and yields its content.
841
842 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000843 digest: hash digest of item to download.
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800844 offset: offset (in bytes) from the start of the file to resume fetch from.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000845
846 Yields:
847 Chunks of downloaded item (as str objects).
848 """
849 raise NotImplementedError()
850
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800851 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000852 """Uploads an |item| with content generated by |content| generator.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000853
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800854 |item| MUST go through 'contains' call to get |push_state| before it can
855 be pushed to the storage.
856
857 To be clear, here is one possible usage:
858 all_items = [... all items to push as Item subclasses ...]
859 for missing_item, push_state in storage_api.contains(all_items).items():
860 storage_api.push(missing_item, push_state)
861
862 When pushing to a namespace with compression, data that should be pushed
863 and data provided by the item is not the same. In that case |content| is
864 not None and it yields chunks of compressed data (using item.content() as
865 a source of original uncompressed data). This is implemented by Storage
866 class.
867
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000868 Arguments:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000869 item: Item object that holds information about an item being pushed.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800870 push_state: push state object as returned by 'contains' call.
871 content: a generator that yields chunks to push, item.content() if None.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000872
873 Returns:
874 None.
875 """
876 raise NotImplementedError()
877
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000878 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800879 """Checks for |items| on the server, prepares missing ones for upload.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000880
881 Arguments:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800882 items: list of Item objects to check for presence.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000883
884 Returns:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800885 A dict missing Item -> opaque push state object to be passed to 'push'.
886 See doc string for 'push'.
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +0000887 """
888 raise NotImplementedError()
889
890
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800891class _IsolateServerPushState(object):
892 """Per-item state passed from IsolateServer.contains to IsolateServer.push.
Mike Frysinger27f03da2014-02-12 16:47:01 -0500893
894 Note this needs to be a global class to support pickling.
895 """
896
897 def __init__(self, upload_url, finalize_url):
898 self.upload_url = upload_url
899 self.finalize_url = finalize_url
900 self.uploaded = False
901 self.finalized = False
902
903
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000904class IsolateServer(StorageApi):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000905 """StorageApi implementation that downloads and uploads to Isolate Server.
906
907 It uploads and downloads directly from Google Storage whenever appropriate.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800908 Works only within single namespace.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000909 """
910
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000911 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000912 super(IsolateServer, self).__init__()
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000913 assert base_url.startswith('http'), base_url
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700914 self._base_url = base_url.rstrip('/')
915 self._namespace = namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000916 self._lock = threading.Lock()
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000917 self._server_caps = None
918
919 @staticmethod
920 def _generate_handshake_request():
921 """Returns a dict to be sent as handshake request body."""
922 # TODO(vadimsh): Set 'pusher' and 'fetcher' according to intended usage.
923 return {
924 'client_app_version': __version__,
925 'fetcher': True,
926 'protocol_version': ISOLATE_PROTOCOL_VERSION,
927 'pusher': True,
928 }
929
930 @staticmethod
931 def _validate_handshake_response(caps):
932 """Validates and normalizes handshake response."""
933 logging.info('Protocol version: %s', caps['protocol_version'])
934 logging.info('Server version: %s', caps['server_app_version'])
935 if caps.get('error'):
936 raise MappingError(caps['error'])
937 if not caps['access_token']:
938 raise ValueError('access_token is missing')
939 return caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000940
941 @property
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000942 def _server_capabilities(self):
943 """Performs handshake with the server if not yet done.
944
945 Returns:
946 Server capabilities dictionary as returned by /handshake endpoint.
947
948 Raises:
949 MappingError if server rejects the handshake.
950 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000951 # TODO(maruel): Make this request much earlier asynchronously while the
952 # files are being enumerated.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800953
954 # TODO(vadimsh): Put |namespace| in the URL so that server can apply
955 # namespace-level ACLs to this call.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000956 with self._lock:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000957 if self._server_caps is None:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000958 try:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -0400959 caps = net.url_read_json(
960 url=self._base_url + '/content-gs/handshake',
961 data=self._generate_handshake_request())
962 if caps is None:
963 raise MappingError('Failed to perform handshake.')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000964 if not isinstance(caps, dict):
965 raise ValueError('Expecting JSON dict')
966 self._server_caps = self._validate_handshake_response(caps)
967 except (ValueError, KeyError, TypeError) as exc:
968 # KeyError exception has very confusing str conversion: it's just a
969 # missing key value and nothing else. So print exception class name
970 # as well.
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -0400971 raise MappingError(
972 'Invalid handshake response (%s): %s' % (
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000973 exc.__class__.__name__, exc))
974 return self._server_caps
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000975
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700976 @property
977 def location(self):
978 return self._base_url
979
980 @property
981 def namespace(self):
982 return self._namespace
983
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000984 def get_fetch_url(self, digest):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +0000985 assert isinstance(digest, basestring)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000986 return '%s/content-gs/retrieve/%s/%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -0700987 self._base_url, self._namespace, digest)
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000988
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800989 def fetch(self, digest, offset=0):
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +0000990 source_url = self.get_fetch_url(digest)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800991 logging.debug('download_file(%s, %d)', source_url, offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000992
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000993 connection = net.url_open(
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800994 source_url,
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -0800995 read_timeout=DOWNLOAD_READ_TIMEOUT,
996 headers={'Range': 'bytes=%d-' % offset} if offset else None)
997
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000998 if not connection:
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800999 raise IOError('Request failed - %s' % source_url)
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001000
1001 # If |offset| is used, verify server respects it by checking Content-Range.
1002 if offset:
1003 content_range = connection.get_header('Content-Range')
1004 if not content_range:
1005 raise IOError('Missing Content-Range header')
1006
1007 # 'Content-Range' format is 'bytes <offset>-<last_byte_index>/<size>'.
1008 # According to a spec, <size> can be '*' meaning "Total size of the file
1009 # is not known in advance".
1010 try:
1011 match = re.match(r'bytes (\d+)-(\d+)/(\d+|\*)', content_range)
1012 if not match:
1013 raise ValueError()
1014 content_offset = int(match.group(1))
1015 last_byte_index = int(match.group(2))
1016 size = None if match.group(3) == '*' else int(match.group(3))
1017 except ValueError:
1018 raise IOError('Invalid Content-Range header: %s' % content_range)
1019
1020 # Ensure returned offset equals requested one.
1021 if offset != content_offset:
1022 raise IOError('Expecting offset %d, got %d (Content-Range is %s)' % (
1023 offset, content_offset, content_range))
1024
1025 # Ensure entire tail of the file is returned.
1026 if size is not None and last_byte_index + 1 != size:
1027 raise IOError('Incomplete response. Content-Range: %s' % content_range)
1028
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001029 return stream_read(connection, NET_IO_FILE_CHUNK)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001030
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001031 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001032 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001033 assert item.digest is not None
1034 assert item.size is not None
1035 assert isinstance(push_state, _IsolateServerPushState)
1036 assert not push_state.finalized
1037
1038 # Default to item.content().
1039 content = item.content() if content is None else content
1040
1041 # Do not iterate byte by byte over 'str'. Push it all as a single chunk.
1042 if isinstance(content, basestring):
1043 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1044 content = [content]
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001045
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001046 # TODO(vadimsh): Do not read from |content| generator when retrying push.
1047 # If |content| is indeed a generator, it can not be re-winded back
1048 # to the beginning of the stream. A retry will find it exhausted. A possible
1049 # solution is to wrap |content| generator with some sort of caching
1050 # restartable generator. It should be done alongside streaming support
1051 # implementation.
1052
1053 # This push operation may be a retry after failed finalization call below,
1054 # no need to reupload contents in that case.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001055 if not push_state.uploaded:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001056 # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
1057 # upload support is implemented.
1058 if isinstance(content, list) and len(content) == 1:
1059 content = content[0]
1060 else:
1061 content = ''.join(content)
1062 # PUT file to |upload_url|.
1063 response = net.url_read(
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001064 url=push_state.upload_url,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001065 data=content,
1066 content_type='application/octet-stream',
1067 method='PUT')
1068 if response is None:
1069 raise IOError('Failed to upload a file %s to %s' % (
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001070 item.digest, push_state.upload_url))
1071 push_state.uploaded = True
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001072 else:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001073 logging.info(
1074 'A file %s already uploaded, retrying finalization only', item.digest)
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001075
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001076 # Optionally notify the server that it's done.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001077 if push_state.finalize_url:
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001078 # TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
1079 # send it to isolated server. That way isolate server can verify that
1080 # the data safely reached Google Storage (GS provides MD5 and CRC32C of
1081 # stored files).
Marc-Antoine Ruelc1c2ccc2014-08-13 19:18:49 -04001082 # TODO(maruel): Fix the server to accept propery data={} so
1083 # url_read_json() can be used.
1084 response = net.url_read(
1085 url=push_state.finalize_url,
1086 data='',
1087 content_type='application/json',
1088 method='POST')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001089 if response is None:
1090 raise IOError('Failed to finalize an upload of %s' % item.digest)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001091 push_state.finalized = True
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001092
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001093 def contains(self, items):
1094 logging.info('Checking existence of %d files...', len(items))
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +00001095
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001096 # Ensure all items were initialized with 'prepare' call. Storage does that.
1097 assert all(i.digest is not None and i.size is not None for i in items)
1098
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001099 # Request body is a json encoded list of dicts.
1100 body = [
1101 {
1102 'h': item.digest,
1103 's': item.size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001104 'i': int(item.high_priority),
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001105 } for item in items
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001106 ]
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001107
1108 query_url = '%s/content-gs/pre-upload/%s?token=%s' % (
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001109 self._base_url,
1110 self._namespace,
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001111 urllib.quote(self._server_capabilities['access_token']))
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001112
1113 # Response body is a list of push_urls (or null if file is already present).
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001114 response = None
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001115 try:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001116 response = net.url_read_json(url=query_url, data=body)
1117 if response is None:
1118 raise MappingError('Failed to execute /pre-upload query')
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001119 if not isinstance(response, list):
1120 raise ValueError('Expecting response with json-encoded list')
1121 if len(response) != len(items):
1122 raise ValueError(
1123 'Incorrect number of items in the list, expected %d, '
1124 'but got %d' % (len(items), len(response)))
1125 except ValueError as err:
1126 raise MappingError(
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001127 'Invalid response from server: %s, body is %s' % (err, response))
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001128
1129 # Pick Items that are missing, attach _PushState to them.
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001130 missing_items = {}
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001131 for i, push_urls in enumerate(response):
1132 if push_urls:
1133 assert len(push_urls) == 2, str(push_urls)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001134 missing_items[items[i]] = _IsolateServerPushState(
1135 push_urls[0], push_urls[1])
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001136 logging.info('Queried %d files, %d cache hit',
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001137 len(items), len(items) - len(missing_items))
1138 return missing_items
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001139
1140
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001141class FileSystem(StorageApi):
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001142 """StorageApi implementation that fetches data from the file system.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001143
1144 The common use case is a NFS/CIFS file server that is mounted locally that is
1145 used to fetch the file on a local partition.
1146 """
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001147
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001148 # Used for push_state instead of None. That way caller is forced to
1149 # call 'contains' before 'push'. Naively passing None in 'push' will not work.
1150 _DUMMY_PUSH_STATE = object()
1151
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001152 def __init__(self, base_path, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001153 super(FileSystem, self).__init__()
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001154 self._base_path = base_path
1155 self._namespace = namespace
1156
1157 @property
1158 def location(self):
1159 return self._base_path
1160
1161 @property
1162 def namespace(self):
1163 return self._namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001164
vadimsh@chromium.orgf24e5c32013-10-11 21:16:21 +00001165 def get_fetch_url(self, digest):
1166 return None
1167
Vadim Shtayuraf0cb97a2013-12-05 13:57:49 -08001168 def fetch(self, digest, offset=0):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001169 assert isinstance(digest, basestring)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001170 return file_read(os.path.join(self._base_path, digest), offset=offset)
maruel@chromium.orge45728d2013-09-16 23:23:22 +00001171
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001172 def push(self, item, push_state, content=None):
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001173 assert isinstance(item, Item)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001174 assert item.digest is not None
1175 assert item.size is not None
1176 assert push_state is self._DUMMY_PUSH_STATE
1177 content = item.content() if content is None else content
1178 if isinstance(content, basestring):
1179 assert not isinstance(content, unicode), 'Unicode string is not allowed'
1180 content = [content]
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001181 file_write(os.path.join(self._base_path, item.digest), content)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001182
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001183 def contains(self, items):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001184 assert all(i.digest is not None and i.size is not None for i in items)
1185 return dict(
1186 (item, self._DUMMY_PUSH_STATE) for item in items
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001187 if not os.path.exists(os.path.join(self._base_path, item.digest))
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001188 )
vadimsh@chromium.org35122be2013-09-19 02:48:00 +00001189
1190
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001191class LocalCache(object):
1192 """Local cache that stores objects fetched via Storage.
1193
1194 It can be accessed concurrently from multiple threads, so it should protect
1195 its internal state with some lock.
1196 """
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001197 cache_dir = None
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001198
1199 def __enter__(self):
1200 """Context manager interface."""
1201 return self
1202
1203 def __exit__(self, _exc_type, _exec_value, _traceback):
1204 """Context manager interface."""
1205 return False
1206
1207 def cached_set(self):
1208 """Returns a set of all cached digests (always a new object)."""
1209 raise NotImplementedError()
1210
1211 def touch(self, digest, size):
1212 """Ensures item is not corrupted and updates its LRU position.
1213
1214 Arguments:
1215 digest: hash digest of item to check.
1216 size: expected size of this item.
1217
1218 Returns:
1219 True if item is in cache and not corrupted.
1220 """
1221 raise NotImplementedError()
1222
1223 def evict(self, digest):
1224 """Removes item from cache if it's there."""
1225 raise NotImplementedError()
1226
1227 def read(self, digest):
1228 """Returns contents of the cached item as a single str."""
1229 raise NotImplementedError()
1230
1231 def write(self, digest, content):
1232 """Reads data from |content| generator and stores it in cache."""
1233 raise NotImplementedError()
1234
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001235 def hardlink(self, digest, dest, file_mode):
1236 """Ensures file at |dest| has same content as cached |digest|.
1237
1238 If file_mode is provided, it is used to set the executable bit if
1239 applicable.
1240 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001241 raise NotImplementedError()
1242
1243
1244class MemoryCache(LocalCache):
1245 """LocalCache implementation that stores everything in memory."""
1246
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001247 def __init__(self, file_mode_mask=0500):
1248 """Args:
1249 file_mode_mask: bit mask to AND file mode with. Default value will make
1250 all mapped files to be read only.
1251 """
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001252 super(MemoryCache, self).__init__()
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001253 self._file_mode_mask = file_mode_mask
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001254 # Let's not assume dict is thread safe.
1255 self._lock = threading.Lock()
1256 self._contents = {}
1257
1258 def cached_set(self):
1259 with self._lock:
1260 return set(self._contents)
1261
1262 def touch(self, digest, size):
1263 with self._lock:
1264 return digest in self._contents
1265
1266 def evict(self, digest):
1267 with self._lock:
1268 self._contents.pop(digest, None)
1269
1270 def read(self, digest):
1271 with self._lock:
1272 return self._contents[digest]
1273
1274 def write(self, digest, content):
1275 # Assemble whole stream before taking the lock.
1276 data = ''.join(content)
1277 with self._lock:
1278 self._contents[digest] = data
1279
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001280 def hardlink(self, digest, dest, file_mode):
1281 """Since data is kept in memory, there is no filenode to hardlink."""
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001282 file_write(dest, [self.read(digest)])
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001283 if file_mode is not None:
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001284 os.chmod(dest, file_mode & self._file_mode_mask)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001285
1286
vadimsh@chromium.org7cdf1c02013-09-25 00:24:16 +00001287def is_namespace_with_compression(namespace):
1288 """Returns True if given |namespace| stores compressed objects."""
1289 return namespace.endswith(('-gzip', '-deflate'))
1290
1291
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001292def get_storage_api(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001293 """Returns an object that implements low-level StorageApi interface.
1294
1295 It is used by Storage to work with single isolate |namespace|. It should
1296 rarely be used directly by clients, see 'get_storage' for
1297 a better alternative.
1298
1299 Arguments:
1300 file_or_url: a file path to use file system based storage, or URL of isolate
1301 service to use shared cloud based storage.
1302 namespace: isolate namespace to operate in, also defines hashing and
1303 compression scheme used, i.e. namespace names that end with '-gzip'
1304 store compressed data.
1305
1306 Returns:
1307 Instance of StorageApi subclass.
1308 """
Marc-Antoine Ruel37989932013-11-19 16:28:08 -05001309 if file_path.is_url(file_or_url):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001310 return IsolateServer(file_or_url, namespace)
1311 else:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001312 return FileSystem(file_or_url, namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001313
1314
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001315def get_storage(file_or_url, namespace):
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001316 """Returns Storage class that can upload and download from |namespace|.
1317
1318 Arguments:
1319 file_or_url: a file path to use file system based storage, or URL of isolate
1320 service to use shared cloud based storage.
1321 namespace: isolate namespace to operate in, also defines hashing and
1322 compression scheme used, i.e. namespace names that end with '-gzip'
1323 store compressed data.
1324
1325 Returns:
1326 Instance of Storage.
1327 """
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001328 return Storage(get_storage_api(file_or_url, namespace))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001329
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001330
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001331def expand_symlinks(indir, relfile):
1332 """Follows symlinks in |relfile|, but treating symlinks that point outside the
1333 build tree as if they were ordinary directories/files. Returns the final
1334 symlink-free target and a list of paths to symlinks encountered in the
1335 process.
1336
1337 The rule about symlinks outside the build tree is for the benefit of the
1338 Chromium OS ebuild, which symlinks the output directory to an unrelated path
1339 in the chroot.
1340
1341 Fails when a directory loop is detected, although in theory we could support
1342 that case.
1343 """
1344 is_directory = relfile.endswith(os.path.sep)
1345 done = indir
1346 todo = relfile.strip(os.path.sep)
1347 symlinks = []
1348
1349 while todo:
1350 pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
1351 done, todo)
1352 if not symlink:
1353 todo = file_path.fix_native_path_case(done, todo)
1354 done = os.path.join(done, todo)
1355 break
1356 symlink_path = os.path.join(done, pre_symlink, symlink)
1357 post_symlink = post_symlink.lstrip(os.path.sep)
1358 # readlink doesn't exist on Windows.
1359 # pylint: disable=E1101
1360 target = os.path.normpath(os.path.join(done, pre_symlink))
1361 symlink_target = os.readlink(symlink_path)
1362 if os.path.isabs(symlink_target):
1363 # Absolute path are considered a normal directories. The use case is
1364 # generally someone who puts the output directory on a separate drive.
1365 target = symlink_target
1366 else:
1367 # The symlink itself could be using the wrong path case.
1368 target = file_path.fix_native_path_case(target, symlink_target)
1369
1370 if not os.path.exists(target):
1371 raise MappingError(
1372 'Symlink target doesn\'t exist: %s -> %s' % (symlink_path, target))
1373 target = file_path.get_native_path_case(target)
1374 if not file_path.path_starts_with(indir, target):
1375 done = symlink_path
1376 todo = post_symlink
1377 continue
1378 if file_path.path_starts_with(target, symlink_path):
1379 raise MappingError(
1380 'Can\'t map recursive symlink reference %s -> %s' %
1381 (symlink_path, target))
1382 logging.info('Found symlink: %s -> %s', symlink_path, target)
1383 symlinks.append(os.path.relpath(symlink_path, indir))
1384 # Treat the common prefix of the old and new paths as done, and start
1385 # scanning again.
1386 target = target.split(os.path.sep)
1387 symlink_path = symlink_path.split(os.path.sep)
1388 prefix_length = 0
1389 for target_piece, symlink_path_piece in zip(target, symlink_path):
1390 if target_piece == symlink_path_piece:
1391 prefix_length += 1
1392 else:
1393 break
1394 done = os.path.sep.join(target[:prefix_length])
1395 todo = os.path.join(
1396 os.path.sep.join(target[prefix_length:]), post_symlink)
1397
1398 relfile = os.path.relpath(done, indir)
1399 relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
1400 return relfile, symlinks
1401
1402
1403def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
1404 """Expands a single input. It can result in multiple outputs.
1405
1406 This function is recursive when relfile is a directory.
1407
1408 Note: this code doesn't properly handle recursive symlink like one created
1409 with:
1410 ln -s .. foo
1411 """
1412 if os.path.isabs(relfile):
1413 raise MappingError('Can\'t map absolute path %s' % relfile)
1414
1415 infile = file_path.normpath(os.path.join(indir, relfile))
1416 if not infile.startswith(indir):
1417 raise MappingError('Can\'t map file %s outside %s' % (infile, indir))
1418
1419 filepath = os.path.join(indir, relfile)
1420 native_filepath = file_path.get_native_path_case(filepath)
1421 if filepath != native_filepath:
1422 # Special case './'.
1423 if filepath != native_filepath + '.' + os.path.sep:
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001424 # While it'd be nice to enforce path casing on Windows, it's impractical.
1425 # Also give up enforcing strict path case on OSX. Really, it's that sad.
1426 # The case where it happens is very specific and hard to reproduce:
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001427 # get_native_path_case(
1428 # u'Foo.framework/Versions/A/Resources/Something.nib') will return
1429 # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
1430 #
1431 # Note that this is really something deep in OSX because running
1432 # ls Foo.framework/Versions/A
1433 # will print out 'Resources', while file_path.get_native_path_case()
1434 # returns a lower case 'r'.
1435 #
1436 # So *something* is happening under the hood resulting in the command 'ls'
1437 # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree. We
1438 # have no idea why.
Marc-Antoine Ruel582e2242014-06-26 15:22:06 -04001439 if sys.platform not in ('darwin', 'win32'):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001440 raise MappingError(
1441 'File path doesn\'t equal native file path\n%s != %s' %
1442 (filepath, native_filepath))
1443
1444 symlinks = []
1445 if follow_symlinks:
1446 relfile, symlinks = expand_symlinks(indir, relfile)
1447
1448 if relfile.endswith(os.path.sep):
1449 if not os.path.isdir(infile):
1450 raise MappingError(
1451 '%s is not a directory but ends with "%s"' % (infile, os.path.sep))
1452
1453 # Special case './'.
1454 if relfile.startswith('.' + os.path.sep):
1455 relfile = relfile[2:]
1456 outfiles = symlinks
1457 try:
1458 for filename in os.listdir(infile):
1459 inner_relfile = os.path.join(relfile, filename)
1460 if blacklist and blacklist(inner_relfile):
1461 continue
1462 if os.path.isdir(os.path.join(indir, inner_relfile)):
1463 inner_relfile += os.path.sep
1464 outfiles.extend(
1465 expand_directory_and_symlink(indir, inner_relfile, blacklist,
1466 follow_symlinks))
1467 return outfiles
1468 except OSError as e:
1469 raise MappingError(
1470 'Unable to iterate over directory %s.\n%s' % (infile, e))
1471 else:
1472 # Always add individual files even if they were blacklisted.
1473 if os.path.isdir(infile):
1474 raise MappingError(
1475 'Input directory %s must have a trailing slash' % infile)
1476
1477 if not os.path.isfile(infile):
1478 raise MappingError('Input file %s doesn\'t exist' % infile)
1479
1480 return symlinks + [relfile]
1481
1482
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001483def process_input(filepath, prevdict, read_only, algo):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001484 """Processes an input file, a dependency, and return meta data about it.
1485
1486 Behaviors:
1487 - Retrieves the file mode, file size, file timestamp, file link
1488 destination if it is a file link and calcultate the SHA-1 of the file's
1489 content if the path points to a file and not a symlink.
1490
1491 Arguments:
1492 filepath: File to act on.
1493 prevdict: the previous dictionary. It is used to retrieve the cached sha-1
1494 to skip recalculating the hash. Optional.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001495 read_only: If 1 or 2, the file mode is manipulated. In practice, only save
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001496 one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
1497 windows, mode is not set since all files are 'executable' by
1498 default.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001499 algo: Hashing algorithm used.
1500
1501 Returns:
1502 The necessary data to create a entry in the 'files' section of an .isolated
1503 file.
1504 """
1505 out = {}
1506 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
1507 # if prevdict.get('T') == True:
1508 # # The file's content is ignored. Skip the time and hard code mode.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001509 # out['s'] = 0
1510 # out['h'] = algo().hexdigest()
1511 # out['T'] = True
1512 # return out
1513
1514 # Always check the file stat and check if it is a link. The timestamp is used
1515 # to know if the file's content/symlink destination should be looked into.
1516 # E.g. only reuse from prevdict if the timestamp hasn't changed.
1517 # There is the risk of the file's timestamp being reset to its last value
1518 # manually while its content changed. We don't protect against that use case.
1519 try:
1520 filestats = os.lstat(filepath)
1521 except OSError:
1522 # The file is not present.
1523 raise MappingError('%s is missing' % filepath)
1524 is_link = stat.S_ISLNK(filestats.st_mode)
1525
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001526 if sys.platform != 'win32':
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001527 # Ignore file mode on Windows since it's not really useful there.
1528 filemode = stat.S_IMODE(filestats.st_mode)
1529 # Remove write access for group and all access to 'others'.
1530 filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
1531 if read_only:
1532 filemode &= ~stat.S_IWUSR
1533 if filemode & stat.S_IXUSR:
1534 filemode |= stat.S_IXGRP
1535 else:
1536 filemode &= ~stat.S_IXGRP
1537 if not is_link:
1538 out['m'] = filemode
1539
1540 # Used to skip recalculating the hash or link destination. Use the most recent
1541 # update time.
1542 # TODO(maruel): Save it in the .state file instead of .isolated so the
1543 # .isolated file is deterministic.
1544 out['t'] = int(round(filestats.st_mtime))
1545
1546 if not is_link:
1547 out['s'] = filestats.st_size
1548 # If the timestamp wasn't updated and the file size is still the same, carry
1549 # on the sha-1.
1550 if (prevdict.get('t') == out['t'] and
1551 prevdict.get('s') == out['s']):
1552 # Reuse the previous hash if available.
1553 out['h'] = prevdict.get('h')
1554 if not out.get('h'):
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001555 out['h'] = isolated_format.hash_file(filepath, algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001556 else:
1557 # If the timestamp wasn't updated, carry on the link destination.
1558 if prevdict.get('t') == out['t']:
1559 # Reuse the previous link destination if available.
1560 out['l'] = prevdict.get('l')
1561 if out.get('l') is None:
1562 # The link could be in an incorrect path case. In practice, this only
1563 # happen on OSX on case insensitive HFS.
1564 # TODO(maruel): It'd be better if it was only done once, in
1565 # expand_directory_and_symlink(), so it would not be necessary to do again
1566 # here.
1567 symlink_value = os.readlink(filepath) # pylint: disable=E1101
1568 filedir = file_path.get_native_path_case(os.path.dirname(filepath))
1569 native_dest = file_path.fix_native_path_case(filedir, symlink_value)
1570 out['l'] = os.path.relpath(native_dest, filedir)
1571 return out
1572
1573
1574def save_isolated(isolated, data):
1575 """Writes one or multiple .isolated files.
1576
1577 Note: this reference implementation does not create child .isolated file so it
1578 always returns an empty list.
1579
1580 Returns the list of child isolated files that are included by |isolated|.
1581 """
1582 # Make sure the data is valid .isolated data by 'reloading' it.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001583 algo = isolated_format.SUPPORTED_ALGOS[data['algo']]
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001584 load_isolated(json.dumps(data), algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05001585 tools.write_json(isolated, data, True)
1586 return []
1587
1588
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001589def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001590 """Uploads the given tree to the given url.
1591
1592 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001593 base_url: The base url, it is assume that |base_url|/has/ can be used to
1594 query if an element was already uploaded, and |base_url|/store/
1595 can be used to upload a new element.
1596 indir: Root directory the infiles are based in.
vadimsh@chromium.orgbcb966b2013-10-01 18:14:18 +00001597 infiles: dict of files to upload from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001598 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001599 """
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001600 logging.info('upload_tree(indir=%s, files=%d)', indir, len(infiles))
1601
1602 # Convert |indir| + |infiles| into a list of FileItem objects.
1603 # Filter out symlinks, since they are not represented by items on isolate
1604 # server side.
1605 items = [
1606 FileItem(
1607 path=os.path.join(indir, filepath),
1608 digest=metadata['h'],
1609 size=metadata['s'],
1610 high_priority=metadata.get('priority') == '0')
1611 for filepath, metadata in infiles.iteritems()
1612 if 'l' not in metadata
1613 ]
1614
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001615 with get_storage(base_url, namespace) as storage:
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001616 storage.upload_items(items)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001617 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001618
1619
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001620def load_isolated(content, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001621 """Verifies the .isolated file is valid and loads this object with the json
1622 data.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001623
1624 Arguments:
1625 - content: raw serialized content to load.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001626 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
1627 algorithm used on the Isolate Server.
maruel@chromium.org41601642013-09-18 19:40:46 +00001628 """
1629 try:
1630 data = json.loads(content)
1631 except ValueError:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001632 raise IsolatedError('Failed to parse: %s...' % content[:100])
maruel@chromium.org41601642013-09-18 19:40:46 +00001633
1634 if not isinstance(data, dict):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001635 raise IsolatedError('Expected dict, got %r' % data)
maruel@chromium.org41601642013-09-18 19:40:46 +00001636
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001637 # Check 'version' first, since it could modify the parsing after.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001638 value = data.get('version', '1.0')
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001639 if not isinstance(value, basestring):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001640 raise IsolatedError('Expected string, got %r' % value)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001641 try:
1642 version = tuple(map(int, value.split('.')))
1643 except ValueError:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001644 raise IsolatedError('Expected valid version, got %r' % value)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001645
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001646 expected_version = tuple(
1647 map(int, isolated_format.ISOLATED_FILE_VERSION.split('.')))
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001648 # Major version must match.
1649 if version[0] != expected_version[0]:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001650 raise IsolatedError(
Marc-Antoine Ruel1c1edd62013-12-06 09:13:13 -05001651 'Expected compatible \'%s\' version, got %r' %
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001652 (isolated_format.ISOLATED_FILE_VERSION, value))
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001653
1654 if algo is None:
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05001655 # TODO(maruel): Remove the default around Jan 2014.
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001656 # Default the algorithm used in the .isolated file itself, falls back to
1657 # 'sha-1' if unspecified.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001658 algo = isolated_format.SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001659
maruel@chromium.org41601642013-09-18 19:40:46 +00001660 for key, value in data.iteritems():
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001661 if key == 'algo':
1662 if not isinstance(value, basestring):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001663 raise IsolatedError('Expected string, got %r' % value)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001664 if value not in isolated_format.SUPPORTED_ALGOS:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001665 raise IsolatedError(
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001666 'Expected one of \'%s\', got %r' %
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001667 (', '.join(sorted(isolated_format.SUPPORTED_ALGOS)), value))
1668 if value != isolated_format.SUPPORTED_ALGOS_REVERSE[algo]:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001669 raise IsolatedError(
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001670 'Expected \'%s\', got %r' %
1671 (isolated_format.SUPPORTED_ALGOS_REVERSE[algo], value))
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001672
1673 elif key == 'command':
maruel@chromium.org41601642013-09-18 19:40:46 +00001674 if not isinstance(value, list):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001675 raise IsolatedError('Expected list, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001676 if not value:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001677 raise IsolatedError('Expected non-empty command')
maruel@chromium.org41601642013-09-18 19:40:46 +00001678 for subvalue in value:
1679 if not isinstance(subvalue, basestring):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001680 raise IsolatedError('Expected string, got %r' % subvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001681
1682 elif key == 'files':
1683 if not isinstance(value, dict):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001684 raise IsolatedError('Expected dict, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001685 for subkey, subvalue in value.iteritems():
1686 if not isinstance(subkey, basestring):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001687 raise IsolatedError('Expected string, got %r' % subkey)
maruel@chromium.org41601642013-09-18 19:40:46 +00001688 if not isinstance(subvalue, dict):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001689 raise IsolatedError('Expected dict, got %r' % subvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001690 for subsubkey, subsubvalue in subvalue.iteritems():
1691 if subsubkey == 'l':
1692 if not isinstance(subsubvalue, basestring):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001693 raise IsolatedError('Expected string, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001694 elif subsubkey == 'm':
1695 if not isinstance(subsubvalue, int):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001696 raise IsolatedError('Expected int, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001697 elif subsubkey == 'h':
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001698 if not isolated_format.is_valid_hash(subsubvalue, algo):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001699 raise IsolatedError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001700 elif subsubkey == 's':
Marc-Antoine Ruelaab3a622013-11-28 09:47:05 -05001701 if not isinstance(subsubvalue, (int, long)):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001702 raise IsolatedError('Expected int or long, got %r' % subsubvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001703 else:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001704 raise IsolatedError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001705 if bool('h' in subvalue) == bool('l' in subvalue):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001706 raise IsolatedError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001707 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
1708 subvalue)
1709 if bool('h' in subvalue) != bool('s' in subvalue):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001710 raise IsolatedError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001711 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
1712 subvalue)
1713 if bool('s' in subvalue) == bool('l' in subvalue):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001714 raise IsolatedError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001715 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
1716 subvalue)
1717 if bool('l' in subvalue) and bool('m' in subvalue):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001718 raise IsolatedError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001719 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org41601642013-09-18 19:40:46 +00001720 subvalue)
1721
1722 elif key == 'includes':
1723 if not isinstance(value, list):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001724 raise IsolatedError('Expected list, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001725 if not value:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001726 raise IsolatedError('Expected non-empty includes list')
maruel@chromium.org41601642013-09-18 19:40:46 +00001727 for subvalue in value:
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001728 if not isolated_format.is_valid_hash(subvalue, algo):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001729 raise IsolatedError('Expected sha-1, got %r' % subvalue)
maruel@chromium.org41601642013-09-18 19:40:46 +00001730
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001731 elif key == 'os':
1732 if version >= (1, 4):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001733 raise IsolatedError('Key \'os\' is not allowed starting version 1.4')
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001734
maruel@chromium.org41601642013-09-18 19:40:46 +00001735 elif key == 'read_only':
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -05001736 if not value in (0, 1, 2):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001737 raise IsolatedError('Expected 0, 1 or 2, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001738
1739 elif key == 'relative_cwd':
1740 if not isinstance(value, basestring):
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001741 raise IsolatedError('Expected string, got %r' % value)
maruel@chromium.org41601642013-09-18 19:40:46 +00001742
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001743 elif key == 'version':
1744 # Already checked above.
1745 pass
1746
maruel@chromium.org41601642013-09-18 19:40:46 +00001747 else:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001748 raise IsolatedError('Unknown key %r' % key)
maruel@chromium.org41601642013-09-18 19:40:46 +00001749
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001750 # Automatically fix os.path.sep if necessary. While .isolated files are always
1751 # in the the native path format, someone could want to download an .isolated
1752 # tree from another OS.
1753 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
1754 if 'files' in data:
1755 data['files'] = dict(
1756 (k.replace(wrong_path_sep, os.path.sep), v)
1757 for k, v in data['files'].iteritems())
1758 for v in data['files'].itervalues():
1759 if 'l' in v:
1760 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
1761 if 'relative_cwd' in data:
1762 data['relative_cwd'] = data['relative_cwd'].replace(
1763 wrong_path_sep, os.path.sep)
maruel@chromium.org41601642013-09-18 19:40:46 +00001764 return data
1765
1766
1767class IsolatedFile(object):
1768 """Represents a single parsed .isolated file."""
1769 def __init__(self, obj_hash, algo):
1770 """|obj_hash| is really the sha-1 of the file."""
1771 logging.debug('IsolatedFile(%s)' % obj_hash)
1772 self.obj_hash = obj_hash
1773 self.algo = algo
1774 # Set once all the left-side of the tree is parsed. 'Tree' here means the
1775 # .isolate and all the .isolated files recursively included by it with
1776 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1777 # .isolated file in the hash table, is important, as the later ones are not
1778 # processed until the firsts are retrieved and read.
1779 self.can_fetch = False
1780
1781 # Raw data.
1782 self.data = {}
1783 # A IsolatedFile instance, one per object in self.includes.
1784 self.children = []
1785
1786 # Set once the .isolated file is loaded.
1787 self._is_parsed = False
1788 # Set once the files are fetched.
1789 self.files_fetched = False
1790
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001791 def load(self, content):
maruel@chromium.org41601642013-09-18 19:40:46 +00001792 """Verifies the .isolated file is valid and loads this object with the json
1793 data.
1794 """
1795 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1796 assert not self._is_parsed
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001797 self.data = load_isolated(content, self.algo)
maruel@chromium.org41601642013-09-18 19:40:46 +00001798 self.children = [
1799 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
1800 ]
1801 self._is_parsed = True
1802
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001803 def fetch_files(self, fetch_queue, files):
maruel@chromium.org41601642013-09-18 19:40:46 +00001804 """Adds files in this .isolated file not present in |files| dictionary.
1805
1806 Preemptively request files.
1807
1808 Note that |files| is modified by this function.
1809 """
1810 assert self.can_fetch
1811 if not self._is_parsed or self.files_fetched:
1812 return
1813 logging.debug('fetch_files(%s)' % self.obj_hash)
1814 for filepath, properties in self.data.get('files', {}).iteritems():
1815 # Root isolated has priority on the files being mapped. In particular,
1816 # overriden files must not be fetched.
1817 if filepath not in files:
1818 files[filepath] = properties
1819 if 'h' in properties:
1820 # Preemptively request files.
1821 logging.debug('fetching %s' % filepath)
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001822 fetch_queue.add(properties['h'], properties['s'], WorkerPool.MED)
maruel@chromium.org41601642013-09-18 19:40:46 +00001823 self.files_fetched = True
1824
1825
1826class Settings(object):
1827 """Results of a completely parsed .isolated file."""
1828 def __init__(self):
1829 self.command = []
1830 self.files = {}
1831 self.read_only = None
1832 self.relative_cwd = None
1833 # The main .isolated file, a IsolatedFile instance.
1834 self.root = None
1835
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001836 def load(self, fetch_queue, root_isolated_hash, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +00001837 """Loads the .isolated and all the included .isolated asynchronously.
1838
1839 It enables support for "included" .isolated files. They are processed in
1840 strict order but fetched asynchronously from the cache. This is important so
1841 that a file in an included .isolated file that is overridden by an embedding
1842 .isolated file is not fetched needlessly. The includes are fetched in one
1843 pass and the files are fetched as soon as all the ones on the left-side
1844 of the tree were fetched.
1845
1846 The prioritization is very important here for nested .isolated files.
1847 'includes' have the highest priority and the algorithm is optimized for both
1848 deep and wide trees. A deep one is a long link of .isolated files referenced
1849 one at a time by one item in 'includes'. A wide one has a large number of
1850 'includes' in a single .isolated file. 'left' is defined as an included
1851 .isolated file earlier in the 'includes' list. So the order of the elements
1852 in 'includes' is important.
1853 """
1854 self.root = IsolatedFile(root_isolated_hash, algo)
1855
1856 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1857 pending = {}
1858 # Set of hashes of already retrieved items to refuse recursive includes.
1859 seen = set()
1860
1861 def retrieve(isolated_file):
1862 h = isolated_file.obj_hash
1863 if h in seen:
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001864 raise IsolatedError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org41601642013-09-18 19:40:46 +00001865 assert h not in pending
1866 seen.add(h)
1867 pending[h] = isolated_file
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08001868 fetch_queue.add(h, priority=WorkerPool.HIGH)
maruel@chromium.org41601642013-09-18 19:40:46 +00001869
1870 retrieve(self.root)
1871
1872 while pending:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001873 item_hash = fetch_queue.wait(pending)
maruel@chromium.org41601642013-09-18 19:40:46 +00001874 item = pending.pop(item_hash)
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001875 item.load(fetch_queue.cache.read(item_hash))
maruel@chromium.org41601642013-09-18 19:40:46 +00001876 if item_hash == root_isolated_hash:
1877 # It's the root item.
1878 item.can_fetch = True
1879
1880 for new_child in item.children:
1881 retrieve(new_child)
1882
1883 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001884 self._traverse_tree(fetch_queue, self.root)
maruel@chromium.org41601642013-09-18 19:40:46 +00001885
1886 def check(n):
1887 return all(check(x) for x in n.children) and n.files_fetched
1888 assert check(self.root)
1889
1890 self.relative_cwd = self.relative_cwd or ''
maruel@chromium.org41601642013-09-18 19:40:46 +00001891
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001892 def _traverse_tree(self, fetch_queue, node):
maruel@chromium.org41601642013-09-18 19:40:46 +00001893 if node.can_fetch:
1894 if not node.files_fetched:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001895 self._update_self(fetch_queue, node)
maruel@chromium.org41601642013-09-18 19:40:46 +00001896 will_break = False
1897 for i in node.children:
1898 if not i.can_fetch:
1899 if will_break:
1900 break
1901 # Automatically mark the first one as fetcheable.
1902 i.can_fetch = True
1903 will_break = True
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001904 self._traverse_tree(fetch_queue, i)
maruel@chromium.org41601642013-09-18 19:40:46 +00001905
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001906 def _update_self(self, fetch_queue, node):
1907 node.fetch_files(fetch_queue, self.files)
maruel@chromium.org41601642013-09-18 19:40:46 +00001908 # Grabs properties.
1909 if not self.command and node.data.get('command'):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001910 # Ensure paths are correctly separated on windows.
maruel@chromium.org41601642013-09-18 19:40:46 +00001911 self.command = node.data['command']
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001912 if self.command:
1913 self.command[0] = self.command[0].replace('/', os.path.sep)
1914 self.command = tools.fix_python_path(self.command)
maruel@chromium.org41601642013-09-18 19:40:46 +00001915 if self.read_only is None and node.data.get('read_only') is not None:
1916 self.read_only = node.data['read_only']
1917 if (self.relative_cwd is None and
1918 node.data.get('relative_cwd') is not None):
1919 self.relative_cwd = node.data['relative_cwd']
1920
1921
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001922def fetch_isolated(isolated_hash, storage, cache, outdir, require_command):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001923 """Aggressively downloads the .isolated file(s), then download all the files.
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001924
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001925 Arguments:
1926 isolated_hash: hash of the root *.isolated file.
1927 storage: Storage class that communicates with isolate storage.
1928 cache: LocalCache class that knows how to store and map files locally.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001929 outdir: Output directory to map file tree to.
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001930 require_command: Ensure *.isolated specifies a command to run.
1931
1932 Returns:
1933 Settings object that holds details about loaded *.isolated file.
1934 """
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001935 logging.debug(
1936 'fetch_isolated(%s, %s, %s, %s, %s)',
1937 isolated_hash, storage, cache, outdir, require_command)
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07001938 # Hash algorithm to use, defined by namespace |storage| is using.
1939 algo = storage.hash_algo
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001940 with cache:
1941 fetch_queue = FetchQueue(storage, cache)
1942 settings = Settings()
1943
1944 with tools.Profiler('GetIsolateds'):
1945 # Optionally support local files by manually adding them to cache.
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04001946 if not isolated_format.is_valid_hash(isolated_hash, algo):
Marc-Antoine Ruel4e8cd182014-06-18 13:27:17 -04001947 logging.debug('%s is not a valid hash, assuming a file', isolated_hash)
1948 try:
1949 isolated_hash = fetch_queue.inject_local_file(isolated_hash, algo)
1950 except IOError:
1951 raise MappingError(
1952 '%s doesn\'t seem to be a valid file. Did you intent to pass a '
1953 'valid hash?' % isolated_hash)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001954
1955 # Load all *.isolated and start loading rest of the files.
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04001956 settings.load(fetch_queue, isolated_hash, algo)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001957 if require_command and not settings.command:
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001958 # TODO(vadimsh): All fetch operations are already enqueue and there's no
1959 # easy way to cancel them.
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04001960 raise IsolatedError('No command to run')
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001961
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001962 with tools.Profiler('GetRest'):
1963 # Create file system hierarchy.
1964 if not os.path.isdir(outdir):
1965 os.makedirs(outdir)
1966 create_directories(outdir, settings.files)
Marc-Antoine Ruelccafe0e2013-11-08 16:15:36 -05001967 create_symlinks(outdir, settings.files.iteritems())
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001968
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001969 # Ensure working directory exists.
1970 cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
1971 if not os.path.isdir(cwd):
1972 os.makedirs(cwd)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001973
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001974 # Multimap: digest -> list of pairs (path, props).
1975 remaining = {}
1976 for filepath, props in settings.files.iteritems():
1977 if 'h' in props:
1978 remaining.setdefault(props['h'], []).append((filepath, props))
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001979
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001980 # Now block on the remaining files to be downloaded and mapped.
1981 logging.info('Retrieving remaining files (%d of them)...',
1982 fetch_queue.pending_count)
1983 last_update = time.time()
1984 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
1985 while remaining:
1986 detector.ping()
1987
1988 # Wait for any item to finish fetching to cache.
1989 digest = fetch_queue.wait(remaining)
1990
1991 # Link corresponding files to a fetched item in cache.
1992 for filepath, props in remaining.pop(digest):
Marc-Antoine Ruelfb199cf2013-11-12 15:38:12 -05001993 cache.hardlink(
1994 digest, os.path.join(outdir, filepath), props.get('m'))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00001995
1996 # Report progress.
1997 duration = time.time() - last_update
1998 if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
1999 msg = '%d files remaining...' % len(remaining)
2000 print msg
2001 logging.info(msg)
2002 last_update = time.time()
2003
2004 # Cache could evict some items we just tried to fetch, it's a fatal error.
2005 if not fetch_queue.verify_all_cached():
2006 raise MappingError('Cache is too small to hold all requested files')
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002007 return settings
2008
2009
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002010def directory_to_metadata(root, algo, blacklist):
2011 """Returns the FileItem list and .isolated metadata for a directory."""
2012 root = file_path.get_native_path_case(root)
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002013 paths = expand_directory_and_symlink(
2014 root, '.' + os.path.sep, blacklist, sys.platform != 'win32')
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002015 metadata = dict(
Marc-Antoine Ruel05199462014-03-13 15:40:48 -04002016 (relpath, process_input(os.path.join(root, relpath), {}, False, algo))
Vadim Shtayura439d3fc2014-05-07 16:05:12 -07002017 for relpath in paths
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002018 )
2019 for v in metadata.itervalues():
2020 v.pop('t')
2021 items = [
2022 FileItem(
2023 path=os.path.join(root, relpath),
2024 digest=meta['h'],
2025 size=meta['s'],
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002026 high_priority=relpath.endswith('.isolated'))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002027 for relpath, meta in metadata.iteritems() if 'h' in meta
2028 ]
2029 return items, metadata
2030
2031
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002032def archive_files_to_storage(storage, files, blacklist):
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002033 """Stores every entries and returns the relevant data.
2034
2035 Arguments:
2036 storage: a Storage object that communicates with the remote object store.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002037 files: list of file paths to upload. If a directory is specified, a
2038 .isolated file is created and its hash is returned.
2039 blacklist: function that returns True if a file should be omitted.
2040 """
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002041 assert all(isinstance(i, unicode) for i in files), files
2042 if len(files) != len(set(map(os.path.abspath, files))):
2043 raise Error('Duplicate entries found.')
2044
2045 results = []
2046 # The temporary directory is only created as needed.
2047 tempdir = None
2048 try:
2049 # TODO(maruel): Yield the files to a worker thread.
2050 items_to_upload = []
2051 for f in files:
2052 try:
2053 filepath = os.path.abspath(f)
2054 if os.path.isdir(filepath):
2055 # Uploading a whole directory.
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002056 items, metadata = directory_to_metadata(
2057 filepath, storage.hash_algo, blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002058
2059 # Create the .isolated file.
2060 if not tempdir:
2061 tempdir = tempfile.mkdtemp(prefix='isolateserver')
2062 handle, isolated = tempfile.mkstemp(dir=tempdir, suffix='.isolated')
2063 os.close(handle)
2064 data = {
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002065 'algo':
2066 isolated_format.SUPPORTED_ALGOS_REVERSE[storage.hash_algo],
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002067 'files': metadata,
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002068 'version': isolated_format.ISOLATED_FILE_VERSION,
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002069 }
2070 save_isolated(isolated, data)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002071 h = isolated_format.hash_file(isolated, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002072 items_to_upload.extend(items)
2073 items_to_upload.append(
2074 FileItem(
2075 path=isolated,
2076 digest=h,
2077 size=os.stat(isolated).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002078 high_priority=True))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002079 results.append((h, f))
2080
2081 elif os.path.isfile(filepath):
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -04002082 h = isolated_format.hash_file(filepath, storage.hash_algo)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002083 items_to_upload.append(
2084 FileItem(
2085 path=filepath,
2086 digest=h,
2087 size=os.stat(filepath).st_size,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -08002088 high_priority=f.endswith('.isolated')))
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002089 results.append((h, f))
2090 else:
2091 raise Error('%s is neither a file or directory.' % f)
2092 except OSError:
2093 raise Error('Failed to process %s.' % f)
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05002094 # Technically we would care about which files were uploaded but we don't
2095 # much in practice.
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002096 _uploaded_files = storage.upload_items(items_to_upload)
2097 return results
2098 finally:
2099 if tempdir:
2100 shutil.rmtree(tempdir)
2101
2102
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002103def archive(out, namespace, files, blacklist):
2104 if files == ['-']:
2105 files = sys.stdin.readlines()
2106
2107 if not files:
2108 raise Error('Nothing to upload')
2109
2110 files = [f.decode('utf-8') for f in files]
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002111 blacklist = tools.gen_blacklist(blacklist)
2112 with get_storage(out, namespace) as storage:
Vadim Shtayurae0ab1902014-04-29 10:55:27 -07002113 results = archive_files_to_storage(storage, files, blacklist)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002114 print('\n'.join('%s %s' % (r[0], r[1]) for r in results))
2115
2116
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002117@subcommand.usage('<file1..fileN> or - to read from stdin')
2118def CMDarchive(parser, args):
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002119 """Archives data to the server.
2120
2121 If a directory is specified, a .isolated file is created the whole directory
2122 is uploaded. Then this .isolated file can be included in another one to run
2123 commands.
2124
2125 The commands output each file that was processed with its content hash. For
2126 directories, the .isolated generated for the directory is listed as the
2127 directory entry itself.
2128 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002129 add_isolate_server_options(parser, False)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002130 parser.add_option(
2131 '--blacklist',
2132 action='append', default=list(DEFAULT_BLACKLIST),
2133 help='List of regexp to use as blacklist filter when uploading '
2134 'directories')
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002135 options, files = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002136 process_isolate_server_options(parser, options)
Vadim Shtayura6b555c12014-07-23 16:22:18 -07002137 if file_path.is_url(options.isolate_server):
2138 auth.ensure_logged_in(options.isolate_server)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002139 try:
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002140 archive(options.isolate_server, options.namespace, files, options.blacklist)
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002141 except Error as e:
2142 parser.error(e.args[0])
Marc-Antoine Ruelfcc3cd82013-11-19 16:31:38 -05002143 return 0
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002144
2145
2146def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002147 """Download data from the server.
2148
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002149 It can either download individual files or a complete tree from a .isolated
2150 file.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002151 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002152 add_isolate_server_options(parser, True)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002153 parser.add_option(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002154 '-i', '--isolated', metavar='HASH',
2155 help='hash of an isolated file, .isolated file content is discarded, use '
2156 '--file if you need it')
2157 parser.add_option(
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002158 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
2159 help='hash and destination of a file, can be used multiple times')
2160 parser.add_option(
2161 '-t', '--target', metavar='DIR', default=os.getcwd(),
2162 help='destination directory')
2163 options, args = parser.parse_args(args)
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002164 process_isolate_server_options(parser, options)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002165 if args:
2166 parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00002167 if bool(options.isolated) == bool(options.file):
2168 parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00002169
2170 options.target = os.path.abspath(options.target)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002171
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002172 remote = options.isolate_server or options.indir
Vadim Shtayura6b555c12014-07-23 16:22:18 -07002173 if file_path.is_url(remote):
2174 auth.ensure_logged_in(remote)
2175
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002176 with get_storage(remote, options.namespace) as storage:
Vadim Shtayura3172be52013-12-03 12:49:05 -08002177 # Fetching individual files.
2178 if options.file:
2179 channel = threading_utils.TaskChannel()
2180 pending = {}
2181 for digest, dest in options.file:
2182 pending[digest] = dest
2183 storage.async_fetch(
2184 channel,
2185 WorkerPool.MED,
2186 digest,
Marc-Antoine Ruel1e7658c2014-08-28 19:46:39 -04002187 isolated_format.UNKNOWN_FILE_SIZE,
Vadim Shtayura3172be52013-12-03 12:49:05 -08002188 functools.partial(file_write, os.path.join(options.target, dest)))
2189 while pending:
2190 fetched = channel.pull()
2191 dest = pending.pop(fetched)
2192 logging.info('%s: %s', fetched, dest)
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002193
Vadim Shtayura3172be52013-12-03 12:49:05 -08002194 # Fetching whole isolated tree.
2195 if options.isolated:
2196 settings = fetch_isolated(
2197 isolated_hash=options.isolated,
2198 storage=storage,
2199 cache=MemoryCache(),
Vadim Shtayura3172be52013-12-03 12:49:05 -08002200 outdir=options.target,
Vadim Shtayura3172be52013-12-03 12:49:05 -08002201 require_command=False)
2202 rel = os.path.join(options.target, settings.relative_cwd)
2203 print('To run this test please run from the directory %s:' %
2204 os.path.join(options.target, rel))
2205 print(' ' + ' '.join(settings.command))
vadimsh@chromium.org7b5dae32013-10-03 16:59:59 +00002206
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002207 return 0
2208
2209
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002210@subcommand.usage('<file1..fileN> or - to read from stdin')
2211def CMDhashtable(parser, args):
2212 """Archives data to a hashtable on the file system.
2213
2214 If a directory is specified, a .isolated file is created the whole directory
2215 is uploaded. Then this .isolated file can be included in another one to run
2216 commands.
2217
2218 The commands output each file that was processed with its content hash. For
2219 directories, the .isolated generated for the directory is listed as the
2220 directory entry itself.
2221 """
2222 add_outdir_options(parser)
2223 parser.add_option(
2224 '--blacklist',
2225 action='append', default=list(DEFAULT_BLACKLIST),
2226 help='List of regexp to use as blacklist filter when uploading '
2227 'directories')
2228 options, files = parser.parse_args(args)
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002229 process_outdir_options(parser, options, os.getcwd())
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002230 try:
2231 # Do not compress files when archiving to the file system.
2232 archive(options.outdir, 'default', files, options.blacklist)
2233 except Error as e:
2234 parser.error(e.args[0])
2235 return 0
2236
2237
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002238def add_isolate_server_options(parser, add_indir):
2239 """Adds --isolate-server and --namespace options to parser.
2240
2241 Includes --indir if desired.
2242 """
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002243 parser.add_option(
2244 '-I', '--isolate-server',
2245 metavar='URL', default=os.environ.get('ISOLATE_SERVER', ''),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002246 help='URL of the Isolate Server to use. Defaults to the environment '
2247 'variable ISOLATE_SERVER if set. No need to specify https://, this '
2248 'is assumed.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002249 parser.add_option(
2250 '--namespace', default='default-gzip',
2251 help='The namespace to use on the Isolate Server, default: %default')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002252 if add_indir:
2253 parser.add_option(
2254 '--indir', metavar='DIR',
2255 help='Directory used to store the hashtable instead of using an '
2256 'isolate server.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002257
2258
2259def process_isolate_server_options(parser, options):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002260 """Processes the --isolate-server and --indir options and aborts if neither is
2261 specified.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002262 """
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002263 has_indir = hasattr(options, 'indir')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002264 if not options.isolate_server:
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002265 if not has_indir:
2266 parser.error('--isolate-server is required.')
2267 elif not options.indir:
2268 parser.error('Use one of --indir or --isolate-server.')
2269 else:
2270 if has_indir and options.indir:
2271 parser.error('Use only one of --indir or --isolate-server.')
2272
2273 if options.isolate_server:
2274 parts = urlparse.urlparse(options.isolate_server, 'https')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002275 if parts.query:
2276 parser.error('--isolate-server doesn\'t support query parameter.')
2277 if parts.fragment:
2278 parser.error('--isolate-server doesn\'t support fragment in the url.')
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002279 # urlparse('foo.com') will result in netloc='', path='foo.com', which is not
2280 # what is desired here.
2281 new = list(parts)
2282 if not new[1] and new[2]:
2283 new[1] = new[2].rstrip('/')
2284 new[2] = ''
2285 new[2] = new[2].rstrip('/')
2286 options.isolate_server = urlparse.urlunparse(new)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04002287 on_error.report_on_exception_exit(options.isolate_server)
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002288 return
2289
2290 if file_path.is_url(options.indir):
2291 parser.error('Can\'t use an URL for --indir.')
2292 options.indir = unicode(options.indir).replace('/', os.path.sep)
2293 options.indir = os.path.abspath(
2294 os.path.normpath(os.path.join(os.getcwd(), options.indir)))
2295 if not os.path.isdir(options.indir):
2296 parser.error('Path given to --indir must exist.')
2297
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05002298
2299
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002300def add_outdir_options(parser):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002301 """Adds --outdir, which is orthogonal to --isolate-server.
2302
2303 Note: On upload, separate commands are used between 'archive' and 'hashtable'.
2304 On 'download', the same command can download from either an isolate server or
2305 a file system.
2306 """
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002307 parser.add_option(
2308 '-o', '--outdir', metavar='DIR',
2309 help='Directory used to recreate the tree.')
2310
2311
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002312def process_outdir_options(parser, options, cwd):
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002313 if not options.outdir:
2314 parser.error('--outdir is required.')
2315 if file_path.is_url(options.outdir):
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05002316 parser.error('Can\'t use an URL for --outdir.')
Marc-Antoine Ruel488ce8f2014-02-09 11:25:04 -05002317 options.outdir = unicode(options.outdir).replace('/', os.path.sep)
2318 # outdir doesn't need native path case since tracing is never done from there.
2319 options.outdir = os.path.abspath(
2320 os.path.normpath(os.path.join(cwd, options.outdir)))
2321 # In theory, we'd create the directory outdir right away. Defer doing it in
2322 # case there's errors in the command line.
2323
2324
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002325class OptionParserIsolateServer(tools.OptionParserWithLogging):
2326 def __init__(self, **kwargs):
Marc-Antoine Ruelac54cb42013-11-18 14:05:35 -05002327 tools.OptionParserWithLogging.__init__(
2328 self,
2329 version=__version__,
2330 prog=os.path.basename(sys.modules[__name__].__file__),
2331 **kwargs)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08002332 auth.add_auth_options(self)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002333
2334 def parse_args(self, *args, **kwargs):
2335 options, args = tools.OptionParserWithLogging.parse_args(
2336 self, *args, **kwargs)
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08002337 auth.process_auth_options(self, options)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002338 return options, args
2339
2340
2341def main(args):
2342 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04002343 return dispatcher.execute(OptionParserIsolateServer(), args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00002344
2345
2346if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002347 fix_encoding.fix_encoding()
2348 tools.disable_buffering()
2349 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002350 sys.exit(main(sys.argv[1:]))