blob: 3667bc9b53f18178655cfd3ef2af5bd84af3ea60 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Archives a set of files to a server."""
7
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00008__version__ = '0.2'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000010import binascii
11import hashlib
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +000012import itertools
maruel@chromium.org41601642013-09-18 19:40:46 +000013import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000014import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000015import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000016import random
17import re
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000018import sys
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000019import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000020import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000021import urllib
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000022import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000023
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000024from third_party import colorama
25from third_party.depot_tools import fix_encoding
26from third_party.depot_tools import subcommand
27
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000028from utils import net
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000029from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000030from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000031
32
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000033# The minimum size of files to upload directly to the blobstore.
maruel@chromium.orgaef29f82012-12-12 15:00:42 +000034MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 1024
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000035
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000036# The number of files to check the isolate server per /contains query.
37# All files are sorted by likelihood of a change in the file content
38# (currently file size is used to estimate this: larger the file -> larger the
39# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
40# are taken and send to '/contains', then next ITEMS_PER_CONTAINS_QUERIES[1],
41# and so on. Numbers here is a trade-off; the more per request, the lower the
42# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
43# larger values cause longer lookups, increasing the initial latency to start
44# uploading, which is especially an issue for large files. This value is
45# optimized for the "few thousands files to look up with minimal number of large
46# files missing" case.
47ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000048
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000049
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000050# A list of already compressed extension types that should not receive any
51# compression before being uploaded.
52ALREADY_COMPRESSED_TYPES = [
53 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'pdf', 'png',
54 'wav', 'zip'
55]
56
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000057
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000058# The file size to be used when we don't know the correct file size,
59# generally used for .isolated files.
60UNKNOWN_FILE_SIZE = None
61
62
63# The size of each chunk to read when downloading and unzipping files.
64ZIPPED_FILE_CHUNK = 16 * 1024
65
66
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000067# Chunk size to use when doing disk I/O.
68DISK_FILE_CHUNK = 1024 * 1024
69
70
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000071# Read timeout in seconds for downloads from isolate storage. If there's no
72# response from the server within this timeout whole download will be aborted.
73DOWNLOAD_READ_TIMEOUT = 60
74
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +000075# Maximum expected delay (in seconds) between successive file fetches
76# in run_tha_test. If it takes longer than that, a deadlock might be happening
77# and all stack frames for all threads are dumped to log.
78DEADLOCK_TIMEOUT = 5 * 60
79
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000080
maruel@chromium.org41601642013-09-18 19:40:46 +000081# The delay (in seconds) to wait between logging statements when retrieving
82# the required files. This is intended to let the user (or buildbot) know that
83# the program is still running.
84DELAY_BETWEEN_UPDATES_IN_SECS = 30
85
86
maruel@chromium.org385d73d2013-09-19 18:33:21 +000087# Sadly, hashlib uses 'sha1' instead of the standard 'sha-1' so explicitly
88# specify the names here.
89SUPPORTED_ALGOS = {
90 'md5': hashlib.md5,
91 'sha-1': hashlib.sha1,
92 'sha-512': hashlib.sha512,
93}
94
95
96# Used for serialization.
97SUPPORTED_ALGOS_REVERSE = dict((v, k) for k, v in SUPPORTED_ALGOS.iteritems())
98
99
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000100class ConfigError(ValueError):
101 """Generic failure to load a .isolated file."""
102 pass
103
104
105class MappingError(OSError):
106 """Failed to recreate the tree."""
107 pass
108
109
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000110def randomness():
111 """Generates low-entropy randomness for MIME encoding.
112
113 Exists so it can be mocked out in unit tests.
114 """
115 return str(time.time())
116
117
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000118def encode_multipart_formdata(fields, files,
119 mime_mapper=lambda _: 'application/octet-stream'):
120 """Encodes a Multipart form data object.
121
122 Args:
123 fields: a sequence (name, value) elements for
124 regular form fields.
125 files: a sequence of (name, filename, value) elements for data to be
126 uploaded as files.
127 mime_mapper: function to return the mime type from the filename.
128 Returns:
129 content_type: for httplib.HTTP instance
130 body: for httplib.HTTP instance
131 """
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000132 boundary = hashlib.md5(randomness()).hexdigest()
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000133 body_list = []
134 for (key, value) in fields:
135 if isinstance(key, unicode):
136 value = key.encode('utf-8')
137 if isinstance(value, unicode):
138 value = value.encode('utf-8')
139 body_list.append('--' + boundary)
140 body_list.append('Content-Disposition: form-data; name="%s"' % key)
141 body_list.append('')
142 body_list.append(value)
143 body_list.append('--' + boundary)
144 body_list.append('')
145 for (key, filename, value) in files:
146 if isinstance(key, unicode):
147 value = key.encode('utf-8')
148 if isinstance(filename, unicode):
149 value = filename.encode('utf-8')
150 if isinstance(value, unicode):
151 value = value.encode('utf-8')
152 body_list.append('--' + boundary)
153 body_list.append('Content-Disposition: form-data; name="%s"; '
154 'filename="%s"' % (key, filename))
155 body_list.append('Content-Type: %s' % mime_mapper(filename))
156 body_list.append('')
157 body_list.append(value)
158 body_list.append('--' + boundary)
159 body_list.append('')
160 if body_list:
161 body_list[-2] += '--'
162 body = '\r\n'.join(body_list)
163 content_type = 'multipart/form-data; boundary=%s' % boundary
164 return content_type, body
165
166
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000167def is_valid_hash(value, algo):
168 """Returns if the value is a valid hash for the corresponding algorithm."""
169 size = 2 * algo().digest_size
170 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
171
172
173def hash_file(filepath, algo):
174 """Calculates the hash of a file without reading it all in memory at once.
175
176 |algo| should be one of hashlib hashing algorithm.
177 """
178 digest = algo()
maruel@chromium.org037758d2012-12-10 17:59:46 +0000179 with open(filepath, 'rb') as f:
180 while True:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000181 chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org037758d2012-12-10 17:59:46 +0000182 if not chunk:
183 break
184 digest.update(chunk)
185 return digest.hexdigest()
186
187
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000188def file_read(filepath, chunk_size=DISK_FILE_CHUNK):
189 """Yields file content in chunks of given |chunk_size|."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000190 with open(filepath, 'rb') as f:
191 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000192 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000193 if not data:
194 break
195 yield data
196
197
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000198def file_write(filepath, content_generator):
199 """Writes file content as generated by content_generator.
200
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000201 Creates the intermediary directory as needed.
202
203 Returns the number of bytes written.
204
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000205 Meant to be mocked out in unit tests.
206 """
207 filedir = os.path.dirname(filepath)
208 if not os.path.isdir(filedir):
209 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000210 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000211 with open(filepath, 'wb') as f:
212 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000213 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000214 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000215 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000216
217
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000218def zip_compress(content_generator, level=7):
219 """Reads chunks from |content_generator| and yields zip compressed chunks."""
220 compressor = zlib.compressobj(level)
221 for chunk in content_generator:
222 compressed = compressor.compress(chunk)
223 if compressed:
224 yield compressed
225 tail = compressor.flush(zlib.Z_FINISH)
226 if tail:
227 yield tail
228
229
230def get_zip_compression_level(filename):
231 """Given a filename calculates the ideal zip compression level to use."""
232 file_ext = os.path.splitext(filename)[1].lower()
233 # TODO(csharp): Profile to find what compression level works best.
234 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
235
236
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000237def create_directories(base_directory, files):
238 """Creates the directory structure needed by the given list of files."""
239 logging.debug('create_directories(%s, %d)', base_directory, len(files))
240 # Creates the tree of directories to create.
241 directories = set(os.path.dirname(f) for f in files)
242 for item in list(directories):
243 while item:
244 directories.add(item)
245 item = os.path.dirname(item)
246 for d in sorted(directories):
247 if d:
248 os.mkdir(os.path.join(base_directory, d))
249
250
251def create_links(base_directory, files):
252 """Creates any links needed by the given set of files."""
253 for filepath, properties in files:
254 if 'l' not in properties:
255 continue
256 if sys.platform == 'win32':
257 # TODO(maruel): Create junctions or empty text files similar to what
258 # cygwin do?
259 logging.warning('Ignoring symlink %s', filepath)
260 continue
261 outfile = os.path.join(base_directory, filepath)
262 # symlink doesn't exist on Windows. So the 'link' property should
263 # never be specified for windows .isolated file.
264 os.symlink(properties['l'], outfile) # pylint: disable=E1101
265 if 'm' in properties:
266 lchmod = getattr(os, 'lchmod', None)
267 if lchmod:
268 lchmod(outfile, properties['m'])
269
270
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000271def generate_remaining_files(files):
272 """Generates a dictionary of all the remaining files to be downloaded."""
273 remaining = {}
274 for filepath, props in files:
275 if 'h' in props:
276 remaining.setdefault(props['h'], []).append((filepath, props))
277
278 return remaining
279
280
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000281def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000282 """Determines if the given files appears valid.
283
284 Currently it just checks the file's size.
285 """
286 if size == UNKNOWN_FILE_SIZE:
287 return True
288 actual_size = os.stat(filepath).st_size
289 if size != actual_size:
290 logging.warning(
291 'Found invalid item %s; %d != %d',
292 os.path.basename(filepath), actual_size, size)
293 return False
294 return True
295
296
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000297def try_remove(filepath):
298 """Removes a file without crashing even if it doesn't exist."""
299 try:
300 os.remove(filepath)
301 except OSError:
302 pass
303
304
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000305def url_read(url, **kwargs):
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000306 result = net.url_read(url, **kwargs)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000307 if result is None:
maruel@chromium.orgef333122013-03-12 20:36:40 +0000308 # If we get no response from the server, assume it is down and raise an
309 # exception.
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000310 raise MappingError('Unable to connect to server %s' % url)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000311 return result
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000312
313
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000314class StorageApi(object):
315 """Base class for object that can download and upload files."""
316
317 def __init__(self):
318 self._pool = None
319
320 def set_pool(self, pool):
321 """Sets WorkerPool that can be used for parallel uploads."""
322 self._pool = pool
323
324 def fetch(self, item, expected_size):
325 """Fetches an object and yields its content."""
326 raise NotImplementedError()
327
328 def push(self, item, expected_size, content_generator, push_urls=None):
329 """Uploads content generated by |content_generator| as |item|."""
330 raise NotImplementedError()
331
332 def check_missing_files(self, files):
333 """Checks for existence of given |files| on the server.
334
335 Arguments:
336 files: list of pairs (file name, metadata dict).
337
338 Returns:
339 A list of files missing on server as a list of triplets
340 (file name, metadata dict, push_urls object to pass to push).
341 """
342 raise NotImplementedError()
343
344 def get_missing_files(self, files):
345 """Yields files that are missing from the server.
346
347 Issues multiple parallel queries via check_missing_files method calls.
348
349 Arguments:
350 files: a dictionary file name -> metadata dict.
351
352 Yields:
353 Triplets (file name, metadata dict, push_urls object to pass to push).
354 """
355 # TODO(maruel, vadimsh): Reuse self._pool here.
356 with threading_utils.ThreadPool(1, 16, 0, prefix='get_missing_files') as tp:
357 for batch in self.batch_files_for_check(files):
358 tp.add_task(0, self.check_missing_files, batch)
359 for missing in itertools.chain.from_iterable(tp.iter_results()):
360 yield missing
361
362 def async_push(self, priority, item, expected_size,
363 content_generator, push_urls=None):
364 """Starts asynchronous push to the server in a parallel thread."""
365 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
366 # content right here. It will block until all file is zipped.
367 data = ''.join(content_generator)
368 self._pool.add_task(
369 priority, self.push, item, expected_size, [data], push_urls)
370
371 @staticmethod
372 def batch_files_for_check(files):
373 """Splits list of files to check for existence on the server into batches.
374
375 Each batch corresponds to a single 'exists?' query to the server via a call
376 to check_missing_files method.
377
378 Arguments:
379 files: a dictionary file name -> metadata dict.
380
381 Yields:
382 Batches of files to query for existence in a single operation,
383 each batch is a list of pairs: (file name, metadata dict).
384 """
385 batch_count = 0
386 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
387 next_queries = []
388 items = ((k, v) for k, v in files.iteritems() if 's' in v)
389 for filename, metadata in sorted(items, key=lambda x: -x[1]['s']):
390 next_queries.append((filename, metadata))
391 if len(next_queries) == batch_size_limit:
392 yield next_queries
393 next_queries = []
394 batch_count += 1
395 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
396 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
397 if next_queries:
398 yield next_queries
399
400
401class IsolateServer(StorageApi):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000402 """Client class to download or upload to Isolate Server."""
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000403 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000404 super(IsolateServer, self).__init__()
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000405 assert base_url.startswith('http'), base_url
406 self.content_url = base_url.rstrip('/') + '/content/'
407 self.namespace = namespace
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000408 self.algo = get_hash_algo(namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000409 self._token = None
410 self._lock = threading.Lock()
411
412 @property
413 def token(self):
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000414 # TODO(maruel): Make this request much earlier asynchronously while the
415 # files are being enumerated.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000416 with self._lock:
417 if not self._token:
418 self._token = urllib.quote(url_read(self.content_url + 'get_token'))
419 return self._token
420
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000421 def fetch(self, item, expected_size):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000422 """Fetches an object and yields its content."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000423 assert isinstance(item, basestring)
424 assert (
425 isinstance(expected_size, (int, long)) or
426 expected_size == UNKNOWN_FILE_SIZE)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000427 zipped_url = '%sretrieve/%s/%s' % (self.content_url, self.namespace, item)
428 logging.debug('download_file(%s)', zipped_url)
429
430 # Because the app engine DB is only eventually consistent, retry 404 errors
431 # because the file might just not be visible yet (even though it has been
432 # uploaded).
433 connection = net.url_open(
434 zipped_url, retry_404=True, read_timeout=DOWNLOAD_READ_TIMEOUT)
435 if not connection:
436 raise IOError('Unable to open connection to %s' % zipped_url)
437
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000438 # TODO(maruel): Must only decompress when needed.
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000439 decompressor = zlib.decompressobj()
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000440 try:
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000441 compressed_size = 0
442 decompressed_size = 0
443 while True:
444 chunk = connection.read(ZIPPED_FILE_CHUNK)
445 if not chunk:
446 break
447 compressed_size += len(chunk)
448 decompressed = decompressor.decompress(chunk)
449 decompressed_size += len(decompressed)
450 yield decompressed
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000451
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000452 # Ensure that all the data was properly decompressed.
453 uncompressed_data = decompressor.flush()
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000454 if uncompressed_data:
455 raise IOError('Decompression failed')
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000456 if (expected_size != UNKNOWN_FILE_SIZE and
457 decompressed_size != expected_size):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000458 raise IOError('File incorrect size after download of %s. Got %s and '
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000459 'expected %s' % (item, decompressed_size, expected_size))
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000460 except zlib.error as e:
461 msg = 'Corrupted zlib for item %s. Processed %d of %s bytes.\n%s' % (
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000462 item, compressed_size, connection.content_length, e)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000463 logging.warning(msg)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000464
465 # Testing seems to show that if a few machines are trying to download
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000466 # the same blob, they can cause each other to fail. So if we hit a zip
467 # error, this is the most likely cause (it only downloads some of the
468 # data). Randomly sleep for between 5 and 25 seconds to try and spread
469 # out the downloads.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000470 sleep_duration = (random.random() * 20) + 5
471 time.sleep(sleep_duration)
472 raise IOError(msg)
maruel@chromium.orgc2bfef42013-08-30 21:46:26 +0000473
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000474 def push(self, item, expected_size, content_generator, push_urls=None):
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000475 """Uploads content generated by |content_generator| as |item| to the remote
476 isolate server.
477 """
478 assert isinstance(item, basestring)
479 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
480 item = str(item)
481 # TODO(maruel): Support large files. This would require streaming support.
482 content = ''.join(content_generator)
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000483 if len(content) > MIN_SIZE_FOR_DIRECT_BLOBSTORE:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000484 return self._upload_hash_content_to_blobstore(item, content)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000485
486 url = '%sstore/%s/%s?token=%s' % (
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000487 self.content_url, self.namespace, item, self.token)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000488 return url_read(
489 url, data=content, content_type='application/octet-stream')
490
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000491 def check_missing_files(self, files):
492 """Checks for existence of given |files| on the server."""
493 logging.info('Checking existence of %d files...', len(files))
494
495 body = ''.join(
496 (binascii.unhexlify(metadata['h']) for (_, metadata) in files))
497 assert (len(body) % self.algo().digest_size) == 0, repr(body)
498
499 query_url = '%scontains/%s?token=%s' % (
500 self.content_url, self.namespace, self.token)
501 response = url_read(
502 query_url, data=body, content_type='application/octet-stream')
503 if len(files) != len(response):
504 raise MappingError(
505 'Got an incorrect number of responses from the server. Expected %d, '
506 'but got %d' % (len(files), len(response)))
507
508 # This implementation of IsolateServer doesn't use push_urls field,
509 # set it to None.
510 missing_files = [
511 files[i] + (None,) for i, flag in enumerate(response) if flag == '\x00'
512 ]
513 logging.info('Queried %d files, %d cache hit',
514 len(files), len(files) - len(missing_files))
515 return missing_files
516
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000517 def _upload_hash_content_to_blobstore(self, item, content):
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000518 """Uploads the content directly to the blobstore via a generated url."""
519 # TODO(maruel): Support large files. This would require streaming support.
520 gen_url = '%sgenerate_blobstore_url/%s/%s' % (
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000521 self.content_url, self.namespace, item)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000522 # Token is guaranteed to be already quoted but it is unnecessary here, and
523 # only here.
524 data = [('token', urllib.unquote(self.token))]
525 content_type, body = encode_multipart_formdata(
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000526 data, [('content', item, content)])
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000527 last_url = gen_url
528 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
529 # Retry HTTP 50x here but not 404.
530 upload_url = net.url_read(gen_url, data=data)
531 if not upload_url:
532 raise MappingError('Unable to connect to server %s' % gen_url)
533 last_url = upload_url
534
535 # Do not retry this request on HTTP 50x. Regenerate an upload url each
536 # time since uploading "consumes" the upload url.
537 result = net.url_read(
538 upload_url, data=body, content_type=content_type, retry_50x=False)
539 if result is not None:
540 return result
541 raise MappingError('Unable to connect to server %s' % last_url)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000542
543
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000544class FileSystem(StorageApi):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000545 """Fetches data from the file system.
546
547 The common use case is a NFS/CIFS file server that is mounted locally that is
548 used to fetch the file on a local partition.
549 """
550 def __init__(self, base_path):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000551 super(FileSystem, self).__init__()
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000552 self.base_path = base_path
553
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000554 def fetch(self, item, expected_size):
555 assert isinstance(item, basestring)
556 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000557 source = os.path.join(self.base_path, item)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000558 if (expected_size != UNKNOWN_FILE_SIZE and
559 not is_valid_file(source, expected_size)):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000560 raise IOError('Invalid file %s' % item)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000561 return file_read(source)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000562
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000563 def push(self, item, expected_size, content_generator, push_urls=None):
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000564 assert isinstance(item, basestring)
565 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
566 dest = os.path.join(self.base_path, item)
567 total = file_write(dest, content_generator)
568 if expected_size != UNKNOWN_FILE_SIZE and total != expected_size:
569 os.remove(dest)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000570 raise IOError(
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000571 'Invalid file %s, %d != %d' % (item, total, expected_size))
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000572
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000573 def check_missing_files(self, files):
574 return [
575 (filename, metadata, None)
576 for filename, metadata in files
577 if not os.path.exists(os.path.join(self.base_path, metadata['h']))
578 ]
579
580
581def get_hash_algo(_namespace):
582 """Return hash algorithm class to use when uploading to given |namespace|."""
583 # TODO(vadimsh): Implement this at some point.
584 return hashlib.sha1
585
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000586
587def get_storage_api(file_or_url, namespace):
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000588 """Returns an object that implements .fetch() and .push()."""
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000589 if re.match(r'^https?://.+$', file_or_url):
590 return IsolateServer(file_or_url, namespace)
591 else:
592 return FileSystem(file_or_url)
593
594
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000595class WorkerPool(threading_utils.AutoRetryThreadPool):
596 """Thread pool that automatically retries on IOError and runs a preconfigured
597 function.
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000598 """
599 # Initial and maximum number of worker threads.
600 INITIAL_WORKERS = 2
601 MAX_WORKERS = 16
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000602 RETRIES = 5
603
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000604 def __init__(self):
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000605 super(WorkerPool, self).__init__(
606 [IOError],
607 self.RETRIES,
608 self.INITIAL_WORKERS,
609 self.MAX_WORKERS,
610 0,
611 'remote')
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000612
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000613
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000614def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000615 """Uploads the given tree to the given url.
616
617 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000618 base_url: The base url, it is assume that |base_url|/has/ can be used to
619 query if an element was already uploaded, and |base_url|/store/
620 can be used to upload a new element.
621 indir: Root directory the infiles are based in.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000622 infiles: dict of files to upload files from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000623 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000624 """
625 logging.info('upload tree(base_url=%s, indir=%s, files=%d)' %
626 (base_url, indir, len(infiles)))
maruel@chromium.org034e3962013-03-13 13:34:25 +0000627
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000628 # Create a pool of workers to zip and upload any files missing from
629 # the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000630 cpus = max(threading_utils.num_processors(), 2)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000631 uploaded = []
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000632
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000633 with WorkerPool() as upload_pool:
634 remote = get_storage_api(base_url, namespace)
635 remote.set_pool(upload_pool)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000636
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000637 def zip_and_trigger_push(filename, metadata, push_urls):
638 """Read the file, zips it and trigger push to the storage."""
639 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
640 path = os.path.join(indir, filename)
641 content_generator = zip_compress(file_read(path, ZIPPED_FILE_CHUNK),
642 get_zip_compression_level(path))
643 if metadata.get('priority', '1') == '0':
644 priority = WorkerPool.HIGH
645 else:
646 priority = WorkerPool.MED
647 return remote.async_push(
648 priority, metadata['h'], UNKNOWN_FILE_SIZE,
649 content_generator, push_urls)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000650
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000651 with threading_utils.ThreadPool(2, cpus, 0, 'zip') as zip_pool:
652 for filename, metadata, push_urls in remote.get_missing_files(infiles):
653 zip_pool.add_task(0, zip_and_trigger_push,
654 filename, metadata, push_urls)
655 uploaded.append((filename, metadata))
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000656 logging.info('Waiting for all files to finish zipping')
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000657 zip_pool.join()
658
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000659 logging.info('All files zipped.')
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000660 upload_pool.join()
661
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000662 logging.info('All files are uploaded')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000663
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000664 total = len(infiles)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000665 total_size = sum(metadata.get('s', 0) for metadata in infiles.itervalues())
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000666 logging.info(
667 'Total: %6d, %9.1fkb',
668 total,
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000669 sum(m.get('s', 0) for m in infiles.itervalues()) / 1024.)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000670 cache_hit = set(infiles.iterkeys()) - set(x[0] for x in uploaded)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000671 cache_hit_size = sum(infiles[i].get('s', 0) for i in cache_hit)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000672 logging.info(
673 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
674 len(cache_hit),
675 cache_hit_size / 1024.,
676 len(cache_hit) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000677 cache_hit_size * 100. / total_size if total_size else 0)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000678 cache_miss = uploaded
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000679 cache_miss_size = sum(infiles[i[0]].get('s', 0) for i in cache_miss)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000680 logging.info(
681 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
682 len(cache_miss),
683 cache_miss_size / 1024.,
684 len(cache_miss) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000685 cache_miss_size * 100. / total_size if total_size else 0)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000686 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000687
688
maruel@chromium.org41601642013-09-18 19:40:46 +0000689class MemoryCache(object):
690 """This class is intended to be usable everywhere the Cache class is.
691
692 Instead of downloading to a cache, all files are kept in memory to be stored
693 in the target directory directly.
694 """
695
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000696 def __init__(self, remote):
maruel@chromium.org41601642013-09-18 19:40:46 +0000697 self.remote = remote
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000698 self._pool = None
maruel@chromium.org41601642013-09-18 19:40:46 +0000699 self._lock = threading.Lock()
700 self._contents = {}
701
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000702 def set_pool(self, pool):
703 self._pool = pool
704
maruel@chromium.org41601642013-09-18 19:40:46 +0000705 def retrieve(self, priority, item, size):
706 """Gets the requested file."""
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000707 self._pool.add_task(priority, self._on_content, item, size)
maruel@chromium.org41601642013-09-18 19:40:46 +0000708
709 def wait_for(self, items):
710 """Starts a loop that waits for at least one of |items| to be retrieved.
711
712 Returns the first item retrieved.
713 """
714 with self._lock:
715 # Flush items already present.
716 for item in items:
717 if item in self._contents:
718 return item
719
720 while True:
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000721 downloaded = self._pool.get_one_result()
maruel@chromium.org41601642013-09-18 19:40:46 +0000722 if downloaded in items:
723 return downloaded
724
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000725 def add(self, filepath, item):
726 with self._lock:
727 with open(filepath, 'rb') as f:
728 self._contents[item] = f.read()
maruel@chromium.org41601642013-09-18 19:40:46 +0000729
730 def read(self, item):
731 return self._contents[item]
732
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000733 def store_to(self, item, dest):
734 file_write(dest, [self._contents[item]])
735
736 def _on_content(self, item, size):
maruel@chromium.org41601642013-09-18 19:40:46 +0000737 data = ''.join(self.remote.fetch(item, size))
738 with self._lock:
739 self._contents[item] = data
740 return item
741
742 def __enter__(self):
743 return self
744
745 def __exit__(self, _exc_type, _exec_value, _traceback):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000746 with self._lock:
747 self._contents = {}
maruel@chromium.org41601642013-09-18 19:40:46 +0000748 return False
749
750
751def load_isolated(content, os_flavor, algo):
752 """Verifies the .isolated file is valid and loads this object with the json
753 data.
maruel@chromium.org385d73d2013-09-19 18:33:21 +0000754
755 Arguments:
756 - content: raw serialized content to load.
757 - os_flavor: OS to load this file on. Optional.
758 - algo: hashlib algorithm class. Used to confirm the algorithm matches the
759 algorithm used on the Isolate Server.
maruel@chromium.org41601642013-09-18 19:40:46 +0000760 """
761 try:
762 data = json.loads(content)
763 except ValueError:
764 raise ConfigError('Failed to parse: %s...' % content[:100])
765
766 if not isinstance(data, dict):
767 raise ConfigError('Expected dict, got %r' % data)
768
maruel@chromium.org385d73d2013-09-19 18:33:21 +0000769 # Check 'version' first, since it could modify the parsing after.
770 value = data.get('version', '1.0')
771 if not isinstance(value, basestring):
772 raise ConfigError('Expected string, got %r' % value)
773 if not re.match(r'^(\d+)\.(\d+)$', value):
774 raise ConfigError('Expected a compatible version, got %r' % value)
775 if value.split('.', 1)[0] != '1':
776 raise ConfigError('Expected compatible \'1.x\' version, got %r' % value)
777
778 if algo is None:
779 # Default the algorithm used in the .isolated file itself, falls back to
780 # 'sha-1' if unspecified.
781 algo = SUPPORTED_ALGOS_REVERSE[data.get('algo', 'sha-1')]
782
maruel@chromium.org41601642013-09-18 19:40:46 +0000783 for key, value in data.iteritems():
maruel@chromium.org385d73d2013-09-19 18:33:21 +0000784 if key == 'algo':
785 if not isinstance(value, basestring):
786 raise ConfigError('Expected string, got %r' % value)
787 if value not in SUPPORTED_ALGOS:
788 raise ConfigError(
789 'Expected one of \'%s\', got %r' %
790 (', '.join(sorted(SUPPORTED_ALGOS)), value))
791 if value != SUPPORTED_ALGOS_REVERSE[algo]:
792 raise ConfigError(
793 'Expected \'%s\', got %r' % (SUPPORTED_ALGOS_REVERSE[algo], value))
794
795 elif key == 'command':
maruel@chromium.org41601642013-09-18 19:40:46 +0000796 if not isinstance(value, list):
797 raise ConfigError('Expected list, got %r' % value)
798 if not value:
799 raise ConfigError('Expected non-empty command')
800 for subvalue in value:
801 if not isinstance(subvalue, basestring):
802 raise ConfigError('Expected string, got %r' % subvalue)
803
804 elif key == 'files':
805 if not isinstance(value, dict):
806 raise ConfigError('Expected dict, got %r' % value)
807 for subkey, subvalue in value.iteritems():
808 if not isinstance(subkey, basestring):
809 raise ConfigError('Expected string, got %r' % subkey)
810 if not isinstance(subvalue, dict):
811 raise ConfigError('Expected dict, got %r' % subvalue)
812 for subsubkey, subsubvalue in subvalue.iteritems():
813 if subsubkey == 'l':
814 if not isinstance(subsubvalue, basestring):
815 raise ConfigError('Expected string, got %r' % subsubvalue)
816 elif subsubkey == 'm':
817 if not isinstance(subsubvalue, int):
818 raise ConfigError('Expected int, got %r' % subsubvalue)
819 elif subsubkey == 'h':
820 if not is_valid_hash(subsubvalue, algo):
821 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
822 elif subsubkey == 's':
823 if not isinstance(subsubvalue, int):
824 raise ConfigError('Expected int, got %r' % subsubvalue)
825 else:
826 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000827 if bool('h' in subvalue) == bool('l' in subvalue):
maruel@chromium.org41601642013-09-18 19:40:46 +0000828 raise ConfigError(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000829 'Need only one of \'h\' (sha-1) or \'l\' (link), got: %r' %
830 subvalue)
831 if bool('h' in subvalue) != bool('s' in subvalue):
832 raise ConfigError(
833 'Both \'h\' (sha-1) and \'s\' (size) should be set, got: %r' %
834 subvalue)
835 if bool('s' in subvalue) == bool('l' in subvalue):
836 raise ConfigError(
837 'Need only one of \'s\' (size) or \'l\' (link), got: %r' %
838 subvalue)
839 if bool('l' in subvalue) and bool('m' in subvalue):
840 raise ConfigError(
841 'Cannot use \'m\' (mode) and \'l\' (link), got: %r' %
maruel@chromium.org41601642013-09-18 19:40:46 +0000842 subvalue)
843
844 elif key == 'includes':
845 if not isinstance(value, list):
846 raise ConfigError('Expected list, got %r' % value)
847 if not value:
848 raise ConfigError('Expected non-empty includes list')
849 for subvalue in value:
850 if not is_valid_hash(subvalue, algo):
851 raise ConfigError('Expected sha-1, got %r' % subvalue)
852
853 elif key == 'read_only':
854 if not isinstance(value, bool):
855 raise ConfigError('Expected bool, got %r' % value)
856
857 elif key == 'relative_cwd':
858 if not isinstance(value, basestring):
859 raise ConfigError('Expected string, got %r' % value)
860
861 elif key == 'os':
862 if os_flavor and value != os_flavor:
863 raise ConfigError(
864 'Expected \'os\' to be \'%s\' but got \'%s\'' %
865 (os_flavor, value))
866
maruel@chromium.org385d73d2013-09-19 18:33:21 +0000867 elif key == 'version':
868 # Already checked above.
869 pass
870
maruel@chromium.org41601642013-09-18 19:40:46 +0000871 else:
maruel@chromium.org385d73d2013-09-19 18:33:21 +0000872 raise ConfigError('Unknown key %r' % key)
maruel@chromium.org41601642013-09-18 19:40:46 +0000873
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000874 # Automatically fix os.path.sep if necessary. While .isolated files are always
875 # in the the native path format, someone could want to download an .isolated
876 # tree from another OS.
877 wrong_path_sep = '/' if os.path.sep == '\\' else '\\'
878 if 'files' in data:
879 data['files'] = dict(
880 (k.replace(wrong_path_sep, os.path.sep), v)
881 for k, v in data['files'].iteritems())
882 for v in data['files'].itervalues():
883 if 'l' in v:
884 v['l'] = v['l'].replace(wrong_path_sep, os.path.sep)
885 if 'relative_cwd' in data:
886 data['relative_cwd'] = data['relative_cwd'].replace(
887 wrong_path_sep, os.path.sep)
maruel@chromium.org41601642013-09-18 19:40:46 +0000888 return data
889
890
891class IsolatedFile(object):
892 """Represents a single parsed .isolated file."""
893 def __init__(self, obj_hash, algo):
894 """|obj_hash| is really the sha-1 of the file."""
895 logging.debug('IsolatedFile(%s)' % obj_hash)
896 self.obj_hash = obj_hash
897 self.algo = algo
898 # Set once all the left-side of the tree is parsed. 'Tree' here means the
899 # .isolate and all the .isolated files recursively included by it with
900 # 'includes' key. The order of each sha-1 in 'includes', each representing a
901 # .isolated file in the hash table, is important, as the later ones are not
902 # processed until the firsts are retrieved and read.
903 self.can_fetch = False
904
905 # Raw data.
906 self.data = {}
907 # A IsolatedFile instance, one per object in self.includes.
908 self.children = []
909
910 # Set once the .isolated file is loaded.
911 self._is_parsed = False
912 # Set once the files are fetched.
913 self.files_fetched = False
914
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000915 def load(self, os_flavor, content):
maruel@chromium.org41601642013-09-18 19:40:46 +0000916 """Verifies the .isolated file is valid and loads this object with the json
917 data.
918 """
919 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
920 assert not self._is_parsed
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000921 self.data = load_isolated(content, os_flavor, self.algo)
maruel@chromium.org41601642013-09-18 19:40:46 +0000922 self.children = [
923 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
924 ]
925 self._is_parsed = True
926
927 def fetch_files(self, cache, files):
928 """Adds files in this .isolated file not present in |files| dictionary.
929
930 Preemptively request files.
931
932 Note that |files| is modified by this function.
933 """
934 assert self.can_fetch
935 if not self._is_parsed or self.files_fetched:
936 return
937 logging.debug('fetch_files(%s)' % self.obj_hash)
938 for filepath, properties in self.data.get('files', {}).iteritems():
939 # Root isolated has priority on the files being mapped. In particular,
940 # overriden files must not be fetched.
941 if filepath not in files:
942 files[filepath] = properties
943 if 'h' in properties:
944 # Preemptively request files.
945 logging.debug('fetching %s' % filepath)
946 cache.retrieve(
947 WorkerPool.MED,
948 properties['h'],
949 properties['s'])
950 self.files_fetched = True
951
952
953class Settings(object):
954 """Results of a completely parsed .isolated file."""
955 def __init__(self):
956 self.command = []
957 self.files = {}
958 self.read_only = None
959 self.relative_cwd = None
960 # The main .isolated file, a IsolatedFile instance.
961 self.root = None
962
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +0000963 def load(self, cache, root_isolated_hash, os_flavor, algo):
maruel@chromium.org41601642013-09-18 19:40:46 +0000964 """Loads the .isolated and all the included .isolated asynchronously.
965
966 It enables support for "included" .isolated files. They are processed in
967 strict order but fetched asynchronously from the cache. This is important so
968 that a file in an included .isolated file that is overridden by an embedding
969 .isolated file is not fetched needlessly. The includes are fetched in one
970 pass and the files are fetched as soon as all the ones on the left-side
971 of the tree were fetched.
972
973 The prioritization is very important here for nested .isolated files.
974 'includes' have the highest priority and the algorithm is optimized for both
975 deep and wide trees. A deep one is a long link of .isolated files referenced
976 one at a time by one item in 'includes'. A wide one has a large number of
977 'includes' in a single .isolated file. 'left' is defined as an included
978 .isolated file earlier in the 'includes' list. So the order of the elements
979 in 'includes' is important.
980 """
981 self.root = IsolatedFile(root_isolated_hash, algo)
982
983 # Isolated files being retrieved now: hash -> IsolatedFile instance.
984 pending = {}
985 # Set of hashes of already retrieved items to refuse recursive includes.
986 seen = set()
987
988 def retrieve(isolated_file):
989 h = isolated_file.obj_hash
990 if h in seen:
991 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
992 assert h not in pending
993 seen.add(h)
994 pending[h] = isolated_file
995 cache.retrieve(WorkerPool.HIGH, h, UNKNOWN_FILE_SIZE)
996
997 retrieve(self.root)
998
999 while pending:
1000 item_hash = cache.wait_for(pending)
1001 item = pending.pop(item_hash)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001002 item.load(os_flavor, cache.read(item_hash))
maruel@chromium.org41601642013-09-18 19:40:46 +00001003 if item_hash == root_isolated_hash:
1004 # It's the root item.
1005 item.can_fetch = True
1006
1007 for new_child in item.children:
1008 retrieve(new_child)
1009
1010 # Traverse the whole tree to see if files can now be fetched.
1011 self._traverse_tree(cache, self.root)
1012
1013 def check(n):
1014 return all(check(x) for x in n.children) and n.files_fetched
1015 assert check(self.root)
1016
1017 self.relative_cwd = self.relative_cwd or ''
1018 self.read_only = self.read_only or False
1019
1020 def _traverse_tree(self, cache, node):
1021 if node.can_fetch:
1022 if not node.files_fetched:
1023 self._update_self(cache, node)
1024 will_break = False
1025 for i in node.children:
1026 if not i.can_fetch:
1027 if will_break:
1028 break
1029 # Automatically mark the first one as fetcheable.
1030 i.can_fetch = True
1031 will_break = True
1032 self._traverse_tree(cache, i)
1033
1034 def _update_self(self, cache, node):
1035 node.fetch_files(cache, self.files)
1036 # Grabs properties.
1037 if not self.command and node.data.get('command'):
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001038 # Ensure paths are correctly separated on windows.
maruel@chromium.org41601642013-09-18 19:40:46 +00001039 self.command = node.data['command']
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001040 if self.command:
1041 self.command[0] = self.command[0].replace('/', os.path.sep)
1042 self.command = tools.fix_python_path(self.command)
maruel@chromium.org41601642013-09-18 19:40:46 +00001043 if self.read_only is None and node.data.get('read_only') is not None:
1044 self.read_only = node.data['read_only']
1045 if (self.relative_cwd is None and
1046 node.data.get('relative_cwd') is not None):
1047 self.relative_cwd = node.data['relative_cwd']
1048
1049
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001050def fetch_isolated(
1051 isolated_hash, cache, outdir, os_flavor, algo, require_command):
1052 """Aggressively downloads the .isolated file(s), then download all the files.
1053 """
1054 settings = Settings()
1055 with WorkerPool() as pool:
1056 with cache:
1057 cache.set_pool(pool)
1058 with tools.Profiler('GetIsolateds'):
1059 # Optionally support local files.
1060 if not is_valid_hash(isolated_hash, algo):
1061 # Adds it in the cache. While not strictly necessary, this
1062 # simplifies the rest.
1063 h = hash_file(isolated_hash, algo)
1064 cache.add(isolated_hash, h)
1065 isolated_hash = h
1066 settings.load(cache, isolated_hash, os_flavor, algo)
1067
1068 if require_command and not settings.command:
1069 raise ConfigError('No command to run')
1070
1071 with tools.Profiler('GetRest'):
1072 create_directories(outdir, settings.files)
1073 create_links(outdir, settings.files.iteritems())
1074 remaining = generate_remaining_files(settings.files.iteritems())
1075
1076 cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
1077 if not os.path.isdir(cwd):
1078 os.makedirs(cwd)
1079
1080 # Now block on the remaining files to be downloaded and mapped.
1081 logging.info('Retrieving remaining files')
1082 last_update = time.time()
1083 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
1084 while remaining:
1085 detector.ping()
1086 obj = cache.wait_for(remaining)
1087 for filepath, properties in remaining.pop(obj):
1088 outfile = os.path.join(outdir, filepath)
1089 cache.store_to(obj, outfile)
1090 if 'm' in properties:
1091 # It's not set on Windows.
1092 os.chmod(outfile, properties['m'])
1093
1094 duration = time.time() - last_update
1095 if duration > DELAY_BETWEEN_UPDATES_IN_SECS:
1096 msg = '%d files remaining...' % len(remaining)
1097 print msg
1098 logging.info(msg)
1099 last_update = time.time()
1100 return settings
1101
1102
1103def download_isolated_tree(isolated_hash, target_directory, remote):
1104 """Downloads the dependencies to the given directory."""
1105 if not os.path.exists(target_directory):
1106 os.makedirs(target_directory)
1107
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001108 cache = MemoryCache(remote)
1109 return fetch_isolated(
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001110 isolated_hash, cache, target_directory, None, remote.algo, False)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001111
1112
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001113@subcommand.usage('<file1..fileN> or - to read from stdin')
1114def CMDarchive(parser, args):
1115 """Archives data to the server."""
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001116 options, files = parser.parse_args(args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001117
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001118 if files == ['-']:
1119 files = sys.stdin.readlines()
1120
1121 if not files:
1122 parser.error('Nothing to upload')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001123
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001124 # Load the necessary metadata.
1125 # TODO(maruel): Use a worker pool to upload as the hashing is being done.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001126 infiles = dict(
1127 (
1128 f,
1129 {
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001130 's': os.stat(f).st_size,
maruel@chromium.org385d73d2013-09-19 18:33:21 +00001131 'h': hash_file(f, get_hash_algo(options.namespace)),
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001132 }
1133 )
1134 for f in files)
1135
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001136 with tools.Profiler('Archive'):
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001137 ret = upload_tree(
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001138 base_url=options.isolate_server,
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001139 indir=os.getcwd(),
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001140 infiles=infiles,
1141 namespace=options.namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001142 if not ret:
1143 print '\n'.join('%s %s' % (infiles[f]['h'], f) for f in sorted(infiles))
1144 return ret
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001145
1146
1147def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001148 """Download data from the server.
1149
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001150 It can either download individual files or a complete tree from a .isolated
1151 file.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001152 """
1153 parser.add_option(
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001154 '-i', '--isolated', metavar='HASH',
1155 help='hash of an isolated file, .isolated file content is discarded, use '
1156 '--file if you need it')
1157 parser.add_option(
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001158 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
1159 help='hash and destination of a file, can be used multiple times')
1160 parser.add_option(
1161 '-t', '--target', metavar='DIR', default=os.getcwd(),
1162 help='destination directory')
1163 options, args = parser.parse_args(args)
1164 if args:
1165 parser.error('Unsupported arguments: %s' % args)
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001166 if bool(options.isolated) == bool(options.file):
1167 parser.error('Use one of --isolated or --file, and only one.')
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001168
1169 options.target = os.path.abspath(options.target)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +00001170 remote = get_storage_api(options.isolate_server, options.namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001171 for h, dest in options.file:
1172 logging.info('%s: %s', h, dest)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +00001173 file_write(
1174 os.path.join(options.target, dest),
1175 remote.fetch(h, UNKNOWN_FILE_SIZE))
maruel@chromium.org4f2ebe42013-09-19 13:09:08 +00001176 if options.isolated:
1177 settings = download_isolated_tree(options.isolated, options.target, remote)
1178 rel = os.path.join(options.target, settings.relative_cwd)
1179 print('To run this test please run from the directory %s:' %
1180 os.path.join(options.target, rel))
1181 print(' ' + ' '.join(settings.command))
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001182 return 0
1183
1184
1185class OptionParserIsolateServer(tools.OptionParserWithLogging):
1186 def __init__(self, **kwargs):
1187 tools.OptionParserWithLogging.__init__(self, **kwargs)
1188 self.add_option(
1189 '-I', '--isolate-server',
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001190 metavar='URL', default='',
1191 help='Isolate server to use')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001192 self.add_option(
1193 '--namespace', default='default-gzip',
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001194 help='The namespace to use on the server, default: %default')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001195
1196 def parse_args(self, *args, **kwargs):
1197 options, args = tools.OptionParserWithLogging.parse_args(
1198 self, *args, **kwargs)
1199 options.isolate_server = options.isolate_server.rstrip('/')
1200 if not options.isolate_server:
1201 self.error('--isolate-server is required.')
1202 return options, args
1203
1204
1205def main(args):
1206 dispatcher = subcommand.CommandDispatcher(__name__)
1207 try:
1208 return dispatcher.execute(
1209 OptionParserIsolateServer(version=__version__), args)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001210 except (ConfigError, MappingError) as e:
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001211 sys.stderr.write('\nError: ')
1212 sys.stderr.write(str(e))
1213 sys.stderr.write('\n')
1214 return 1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001215
1216
1217if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001218 fix_encoding.fix_encoding()
1219 tools.disable_buffering()
1220 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001221 sys.exit(main(sys.argv[1:]))