blob: 1e8030df609c4d536ca03dd46d7e64db167404a6 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Archives a set of files to a server."""
7
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00008__version__ = '0.2'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000010import binascii
11import hashlib
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +000012import itertools
maruel@chromium.org41601642013-09-18 19:40:46 +000013import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000014import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000015import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000016import random
17import re
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000018import sys
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000019import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000020import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000021import urllib
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000022import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000023
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000024from third_party import colorama
25from third_party.depot_tools import fix_encoding
26from third_party.depot_tools import subcommand
27
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000028from utils import net
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000029from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000030from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000031
32
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000033# Default server.
34# TODO(maruel): Chromium-specific.
35ISOLATE_SERVER = 'https://isolateserver-dev.appspot.com/'
36
37
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000038# The minimum size of files to upload directly to the blobstore.
maruel@chromium.orgaef29f82012-12-12 15:00:42 +000039MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 1024
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000040
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000041# The number of files to check the isolate server per /contains query.
42# All files are sorted by likelihood of a change in the file content
43# (currently file size is used to estimate this: larger the file -> larger the
44# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
45# are taken and send to '/contains', then next ITEMS_PER_CONTAINS_QUERIES[1],
46# and so on. Numbers here is a trade-off; the more per request, the lower the
47# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
48# larger values cause longer lookups, increasing the initial latency to start
49# uploading, which is especially an issue for large files. This value is
50# optimized for the "few thousands files to look up with minimal number of large
51# files missing" case.
52ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000053
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000054
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000055# A list of already compressed extension types that should not receive any
56# compression before being uploaded.
57ALREADY_COMPRESSED_TYPES = [
58 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'pdf', 'png',
59 'wav', 'zip'
60]
61
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000062
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000063# The file size to be used when we don't know the correct file size,
64# generally used for .isolated files.
65UNKNOWN_FILE_SIZE = None
66
67
68# The size of each chunk to read when downloading and unzipping files.
69ZIPPED_FILE_CHUNK = 16 * 1024
70
71
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000072# Chunk size to use when doing disk I/O.
73DISK_FILE_CHUNK = 1024 * 1024
74
75
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000076# Read timeout in seconds for downloads from isolate storage. If there's no
77# response from the server within this timeout whole download will be aborted.
78DOWNLOAD_READ_TIMEOUT = 60
79
80
maruel@chromium.org41601642013-09-18 19:40:46 +000081# The delay (in seconds) to wait between logging statements when retrieving
82# the required files. This is intended to let the user (or buildbot) know that
83# the program is still running.
84DELAY_BETWEEN_UPDATES_IN_SECS = 30
85
86
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000087class ConfigError(ValueError):
88 """Generic failure to load a .isolated file."""
89 pass
90
91
92class MappingError(OSError):
93 """Failed to recreate the tree."""
94 pass
95
96
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +000097def randomness():
98 """Generates low-entropy randomness for MIME encoding.
99
100 Exists so it can be mocked out in unit tests.
101 """
102 return str(time.time())
103
104
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000105def encode_multipart_formdata(fields, files,
106 mime_mapper=lambda _: 'application/octet-stream'):
107 """Encodes a Multipart form data object.
108
109 Args:
110 fields: a sequence (name, value) elements for
111 regular form fields.
112 files: a sequence of (name, filename, value) elements for data to be
113 uploaded as files.
114 mime_mapper: function to return the mime type from the filename.
115 Returns:
116 content_type: for httplib.HTTP instance
117 body: for httplib.HTTP instance
118 """
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000119 boundary = hashlib.md5(randomness()).hexdigest()
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000120 body_list = []
121 for (key, value) in fields:
122 if isinstance(key, unicode):
123 value = key.encode('utf-8')
124 if isinstance(value, unicode):
125 value = value.encode('utf-8')
126 body_list.append('--' + boundary)
127 body_list.append('Content-Disposition: form-data; name="%s"' % key)
128 body_list.append('')
129 body_list.append(value)
130 body_list.append('--' + boundary)
131 body_list.append('')
132 for (key, filename, value) in files:
133 if isinstance(key, unicode):
134 value = key.encode('utf-8')
135 if isinstance(filename, unicode):
136 value = filename.encode('utf-8')
137 if isinstance(value, unicode):
138 value = value.encode('utf-8')
139 body_list.append('--' + boundary)
140 body_list.append('Content-Disposition: form-data; name="%s"; '
141 'filename="%s"' % (key, filename))
142 body_list.append('Content-Type: %s' % mime_mapper(filename))
143 body_list.append('')
144 body_list.append(value)
145 body_list.append('--' + boundary)
146 body_list.append('')
147 if body_list:
148 body_list[-2] += '--'
149 body = '\r\n'.join(body_list)
150 content_type = 'multipart/form-data; boundary=%s' % boundary
151 return content_type, body
152
153
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000154def is_valid_hash(value, algo):
155 """Returns if the value is a valid hash for the corresponding algorithm."""
156 size = 2 * algo().digest_size
157 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
158
159
160def hash_file(filepath, algo):
161 """Calculates the hash of a file without reading it all in memory at once.
162
163 |algo| should be one of hashlib hashing algorithm.
164 """
165 digest = algo()
maruel@chromium.org037758d2012-12-10 17:59:46 +0000166 with open(filepath, 'rb') as f:
167 while True:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000168 chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org037758d2012-12-10 17:59:46 +0000169 if not chunk:
170 break
171 digest.update(chunk)
172 return digest.hexdigest()
173
174
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000175def file_read(filepath, chunk_size=DISK_FILE_CHUNK):
176 """Yields file content in chunks of given |chunk_size|."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000177 with open(filepath, 'rb') as f:
178 while True:
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000179 data = f.read(chunk_size)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000180 if not data:
181 break
182 yield data
183
184
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000185def file_write(filepath, content_generator):
186 """Writes file content as generated by content_generator.
187
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000188 Creates the intermediary directory as needed.
189
190 Returns the number of bytes written.
191
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000192 Meant to be mocked out in unit tests.
193 """
194 filedir = os.path.dirname(filepath)
195 if not os.path.isdir(filedir):
196 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000197 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000198 with open(filepath, 'wb') as f:
199 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000200 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000201 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000202 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000203
204
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000205def zip_compress(content_generator, level=7):
206 """Reads chunks from |content_generator| and yields zip compressed chunks."""
207 compressor = zlib.compressobj(level)
208 for chunk in content_generator:
209 compressed = compressor.compress(chunk)
210 if compressed:
211 yield compressed
212 tail = compressor.flush(zlib.Z_FINISH)
213 if tail:
214 yield tail
215
216
217def get_zip_compression_level(filename):
218 """Given a filename calculates the ideal zip compression level to use."""
219 file_ext = os.path.splitext(filename)[1].lower()
220 # TODO(csharp): Profile to find what compression level works best.
221 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
222
223
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000224def create_directories(base_directory, files):
225 """Creates the directory structure needed by the given list of files."""
226 logging.debug('create_directories(%s, %d)', base_directory, len(files))
227 # Creates the tree of directories to create.
228 directories = set(os.path.dirname(f) for f in files)
229 for item in list(directories):
230 while item:
231 directories.add(item)
232 item = os.path.dirname(item)
233 for d in sorted(directories):
234 if d:
235 os.mkdir(os.path.join(base_directory, d))
236
237
238def create_links(base_directory, files):
239 """Creates any links needed by the given set of files."""
240 for filepath, properties in files:
241 if 'l' not in properties:
242 continue
243 if sys.platform == 'win32':
244 # TODO(maruel): Create junctions or empty text files similar to what
245 # cygwin do?
246 logging.warning('Ignoring symlink %s', filepath)
247 continue
248 outfile = os.path.join(base_directory, filepath)
249 # symlink doesn't exist on Windows. So the 'link' property should
250 # never be specified for windows .isolated file.
251 os.symlink(properties['l'], outfile) # pylint: disable=E1101
252 if 'm' in properties:
253 lchmod = getattr(os, 'lchmod', None)
254 if lchmod:
255 lchmod(outfile, properties['m'])
256
257
258def setup_commands(base_directory, cwd, cmd):
259 """Correctly adjusts and then returns the required working directory
260 and command needed to run the test.
261 """
262 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
263 cwd = os.path.join(base_directory, cwd)
264 if not os.path.isdir(cwd):
265 os.makedirs(cwd)
266
267 # Ensure paths are correctly separated on windows.
268 cmd[0] = cmd[0].replace('/', os.path.sep)
269 cmd = tools.fix_python_path(cmd)
270
271 return cwd, cmd
272
273
274def generate_remaining_files(files):
275 """Generates a dictionary of all the remaining files to be downloaded."""
276 remaining = {}
277 for filepath, props in files:
278 if 'h' in props:
279 remaining.setdefault(props['h'], []).append((filepath, props))
280
281 return remaining
282
283
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000284def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000285 """Determines if the given files appears valid.
286
287 Currently it just checks the file's size.
288 """
289 if size == UNKNOWN_FILE_SIZE:
290 return True
291 actual_size = os.stat(filepath).st_size
292 if size != actual_size:
293 logging.warning(
294 'Found invalid item %s; %d != %d',
295 os.path.basename(filepath), actual_size, size)
296 return False
297 return True
298
299
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000300def try_remove(filepath):
301 """Removes a file without crashing even if it doesn't exist."""
302 try:
303 os.remove(filepath)
304 except OSError:
305 pass
306
307
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000308def url_read(url, **kwargs):
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000309 result = net.url_read(url, **kwargs)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000310 if result is None:
maruel@chromium.orgef333122013-03-12 20:36:40 +0000311 # If we get no response from the server, assume it is down and raise an
312 # exception.
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000313 raise MappingError('Unable to connect to server %s' % url)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000314 return result
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000315
316
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000317class StorageApi(object):
318 """Base class for object that can download and upload files."""
319
320 def __init__(self):
321 self._pool = None
322
323 def set_pool(self, pool):
324 """Sets WorkerPool that can be used for parallel uploads."""
325 self._pool = pool
326
327 def fetch(self, item, expected_size):
328 """Fetches an object and yields its content."""
329 raise NotImplementedError()
330
331 def push(self, item, expected_size, content_generator, push_urls=None):
332 """Uploads content generated by |content_generator| as |item|."""
333 raise NotImplementedError()
334
335 def check_missing_files(self, files):
336 """Checks for existence of given |files| on the server.
337
338 Arguments:
339 files: list of pairs (file name, metadata dict).
340
341 Returns:
342 A list of files missing on server as a list of triplets
343 (file name, metadata dict, push_urls object to pass to push).
344 """
345 raise NotImplementedError()
346
347 def get_missing_files(self, files):
348 """Yields files that are missing from the server.
349
350 Issues multiple parallel queries via check_missing_files method calls.
351
352 Arguments:
353 files: a dictionary file name -> metadata dict.
354
355 Yields:
356 Triplets (file name, metadata dict, push_urls object to pass to push).
357 """
358 # TODO(maruel, vadimsh): Reuse self._pool here.
359 with threading_utils.ThreadPool(1, 16, 0, prefix='get_missing_files') as tp:
360 for batch in self.batch_files_for_check(files):
361 tp.add_task(0, self.check_missing_files, batch)
362 for missing in itertools.chain.from_iterable(tp.iter_results()):
363 yield missing
364
365 def async_push(self, priority, item, expected_size,
366 content_generator, push_urls=None):
367 """Starts asynchronous push to the server in a parallel thread."""
368 # TODO(vadimsh): Implement streaming uploads. Before it's done, assemble
369 # content right here. It will block until all file is zipped.
370 data = ''.join(content_generator)
371 self._pool.add_task(
372 priority, self.push, item, expected_size, [data], push_urls)
373
374 @staticmethod
375 def batch_files_for_check(files):
376 """Splits list of files to check for existence on the server into batches.
377
378 Each batch corresponds to a single 'exists?' query to the server via a call
379 to check_missing_files method.
380
381 Arguments:
382 files: a dictionary file name -> metadata dict.
383
384 Yields:
385 Batches of files to query for existence in a single operation,
386 each batch is a list of pairs: (file name, metadata dict).
387 """
388 batch_count = 0
389 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
390 next_queries = []
391 items = ((k, v) for k, v in files.iteritems() if 's' in v)
392 for filename, metadata in sorted(items, key=lambda x: -x[1]['s']):
393 next_queries.append((filename, metadata))
394 if len(next_queries) == batch_size_limit:
395 yield next_queries
396 next_queries = []
397 batch_count += 1
398 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
399 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
400 if next_queries:
401 yield next_queries
402
403
404class IsolateServer(StorageApi):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000405 """Client class to download or upload to Isolate Server."""
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000406 def __init__(self, base_url, namespace):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000407 super(IsolateServer, self).__init__()
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000408 assert base_url.startswith('http'), base_url
409 self.content_url = base_url.rstrip('/') + '/content/'
410 self.namespace = namespace
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000411 self.algo = get_hash_algo(namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000412 self._token = None
413 self._lock = threading.Lock()
414
415 @property
416 def token(self):
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000417 # TODO(maruel): Make this request much earlier asynchronously while the
418 # files are being enumerated.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000419 with self._lock:
420 if not self._token:
421 self._token = urllib.quote(url_read(self.content_url + 'get_token'))
422 return self._token
423
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000424 def fetch(self, item, expected_size):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000425 """Fetches an object and yields its content."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000426 assert isinstance(item, basestring)
427 assert (
428 isinstance(expected_size, (int, long)) or
429 expected_size == UNKNOWN_FILE_SIZE)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000430 zipped_url = '%sretrieve/%s/%s' % (self.content_url, self.namespace, item)
431 logging.debug('download_file(%s)', zipped_url)
432
433 # Because the app engine DB is only eventually consistent, retry 404 errors
434 # because the file might just not be visible yet (even though it has been
435 # uploaded).
436 connection = net.url_open(
437 zipped_url, retry_404=True, read_timeout=DOWNLOAD_READ_TIMEOUT)
438 if not connection:
439 raise IOError('Unable to open connection to %s' % zipped_url)
440
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000441 # TODO(maruel): Must only decompress when needed.
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000442 decompressor = zlib.decompressobj()
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000443 try:
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000444 compressed_size = 0
445 decompressed_size = 0
446 while True:
447 chunk = connection.read(ZIPPED_FILE_CHUNK)
448 if not chunk:
449 break
450 compressed_size += len(chunk)
451 decompressed = decompressor.decompress(chunk)
452 decompressed_size += len(decompressed)
453 yield decompressed
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000454
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000455 # Ensure that all the data was properly decompressed.
456 uncompressed_data = decompressor.flush()
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000457 if uncompressed_data:
458 raise IOError('Decompression failed')
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000459 if (expected_size != UNKNOWN_FILE_SIZE and
460 decompressed_size != expected_size):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000461 raise IOError('File incorrect size after download of %s. Got %s and '
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000462 'expected %s' % (item, decompressed_size, expected_size))
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000463 except zlib.error as e:
464 msg = 'Corrupted zlib for item %s. Processed %d of %s bytes.\n%s' % (
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000465 item, compressed_size, connection.content_length, e)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000466 logging.warning(msg)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000467
468 # Testing seems to show that if a few machines are trying to download
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000469 # the same blob, they can cause each other to fail. So if we hit a zip
470 # error, this is the most likely cause (it only downloads some of the
471 # data). Randomly sleep for between 5 and 25 seconds to try and spread
472 # out the downloads.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000473 sleep_duration = (random.random() * 20) + 5
474 time.sleep(sleep_duration)
475 raise IOError(msg)
maruel@chromium.orgc2bfef42013-08-30 21:46:26 +0000476
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000477 def push(self, item, expected_size, content_generator, push_urls=None):
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000478 """Uploads content generated by |content_generator| as |item| to the remote
479 isolate server.
480 """
481 assert isinstance(item, basestring)
482 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
483 item = str(item)
484 # TODO(maruel): Support large files. This would require streaming support.
485 content = ''.join(content_generator)
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000486 if len(content) > MIN_SIZE_FOR_DIRECT_BLOBSTORE:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000487 return self._upload_hash_content_to_blobstore(item, content)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000488
489 url = '%sstore/%s/%s?token=%s' % (
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000490 self.content_url, self.namespace, item, self.token)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000491 return url_read(
492 url, data=content, content_type='application/octet-stream')
493
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000494 def check_missing_files(self, files):
495 """Checks for existence of given |files| on the server."""
496 logging.info('Checking existence of %d files...', len(files))
497
498 body = ''.join(
499 (binascii.unhexlify(metadata['h']) for (_, metadata) in files))
500 assert (len(body) % self.algo().digest_size) == 0, repr(body)
501
502 query_url = '%scontains/%s?token=%s' % (
503 self.content_url, self.namespace, self.token)
504 response = url_read(
505 query_url, data=body, content_type='application/octet-stream')
506 if len(files) != len(response):
507 raise MappingError(
508 'Got an incorrect number of responses from the server. Expected %d, '
509 'but got %d' % (len(files), len(response)))
510
511 # This implementation of IsolateServer doesn't use push_urls field,
512 # set it to None.
513 missing_files = [
514 files[i] + (None,) for i, flag in enumerate(response) if flag == '\x00'
515 ]
516 logging.info('Queried %d files, %d cache hit',
517 len(files), len(files) - len(missing_files))
518 return missing_files
519
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000520 def _upload_hash_content_to_blobstore(self, item, content):
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000521 """Uploads the content directly to the blobstore via a generated url."""
522 # TODO(maruel): Support large files. This would require streaming support.
523 gen_url = '%sgenerate_blobstore_url/%s/%s' % (
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000524 self.content_url, self.namespace, item)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000525 # Token is guaranteed to be already quoted but it is unnecessary here, and
526 # only here.
527 data = [('token', urllib.unquote(self.token))]
528 content_type, body = encode_multipart_formdata(
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000529 data, [('content', item, content)])
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000530 last_url = gen_url
531 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
532 # Retry HTTP 50x here but not 404.
533 upload_url = net.url_read(gen_url, data=data)
534 if not upload_url:
535 raise MappingError('Unable to connect to server %s' % gen_url)
536 last_url = upload_url
537
538 # Do not retry this request on HTTP 50x. Regenerate an upload url each
539 # time since uploading "consumes" the upload url.
540 result = net.url_read(
541 upload_url, data=body, content_type=content_type, retry_50x=False)
542 if result is not None:
543 return result
544 raise MappingError('Unable to connect to server %s' % last_url)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000545
546
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000547class FileSystem(StorageApi):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000548 """Fetches data from the file system.
549
550 The common use case is a NFS/CIFS file server that is mounted locally that is
551 used to fetch the file on a local partition.
552 """
553 def __init__(self, base_path):
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000554 super(FileSystem, self).__init__()
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000555 self.base_path = base_path
556
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000557 def fetch(self, item, expected_size):
558 assert isinstance(item, basestring)
559 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000560 source = os.path.join(self.base_path, item)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000561 if (expected_size != UNKNOWN_FILE_SIZE and
562 not is_valid_file(source, expected_size)):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000563 raise IOError('Invalid file %s' % item)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000564 return file_read(source)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000565
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000566 def push(self, item, expected_size, content_generator, push_urls=None):
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000567 assert isinstance(item, basestring)
568 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
569 dest = os.path.join(self.base_path, item)
570 total = file_write(dest, content_generator)
571 if expected_size != UNKNOWN_FILE_SIZE and total != expected_size:
572 os.remove(dest)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000573 raise IOError(
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000574 'Invalid file %s, %d != %d' % (item, total, expected_size))
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000575
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000576 def check_missing_files(self, files):
577 return [
578 (filename, metadata, None)
579 for filename, metadata in files
580 if not os.path.exists(os.path.join(self.base_path, metadata['h']))
581 ]
582
583
584def get_hash_algo(_namespace):
585 """Return hash algorithm class to use when uploading to given |namespace|."""
586 # TODO(vadimsh): Implement this at some point.
587 return hashlib.sha1
588
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000589
590def get_storage_api(file_or_url, namespace):
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000591 """Returns an object that implements .fetch() and .push()."""
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000592 if re.match(r'^https?://.+$', file_or_url):
593 return IsolateServer(file_or_url, namespace)
594 else:
595 return FileSystem(file_or_url)
596
597
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000598class WorkerPool(threading_utils.AutoRetryThreadPool):
599 """Thread pool that automatically retries on IOError and runs a preconfigured
600 function.
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000601 """
602 # Initial and maximum number of worker threads.
603 INITIAL_WORKERS = 2
604 MAX_WORKERS = 16
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000605 RETRIES = 5
606
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000607 def __init__(self):
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000608 super(WorkerPool, self).__init__(
609 [IOError],
610 self.RETRIES,
611 self.INITIAL_WORKERS,
612 self.MAX_WORKERS,
613 0,
614 'remote')
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000615
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000616
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000617def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000618 """Uploads the given tree to the given url.
619
620 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000621 base_url: The base url, it is assume that |base_url|/has/ can be used to
622 query if an element was already uploaded, and |base_url|/store/
623 can be used to upload a new element.
624 indir: Root directory the infiles are based in.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000625 infiles: dict of files to upload files from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000626 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000627 """
628 logging.info('upload tree(base_url=%s, indir=%s, files=%d)' %
629 (base_url, indir, len(infiles)))
maruel@chromium.org034e3962013-03-13 13:34:25 +0000630
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000631 # Create a pool of workers to zip and upload any files missing from
632 # the server.
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000633 cpus = max(threading_utils.num_processors(), 2)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000634 uploaded = []
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000635
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000636 with WorkerPool() as upload_pool:
637 remote = get_storage_api(base_url, namespace)
638 remote.set_pool(upload_pool)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000639
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000640 def zip_and_trigger_push(filename, metadata, push_urls):
641 """Read the file, zips it and trigger push to the storage."""
642 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
643 path = os.path.join(indir, filename)
644 content_generator = zip_compress(file_read(path, ZIPPED_FILE_CHUNK),
645 get_zip_compression_level(path))
646 if metadata.get('priority', '1') == '0':
647 priority = WorkerPool.HIGH
648 else:
649 priority = WorkerPool.MED
650 return remote.async_push(
651 priority, metadata['h'], UNKNOWN_FILE_SIZE,
652 content_generator, push_urls)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000653
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000654 with threading_utils.ThreadPool(2, cpus, 0, 'zip') as zip_pool:
655 for filename, metadata, push_urls in remote.get_missing_files(infiles):
656 zip_pool.add_task(0, zip_and_trigger_push,
657 filename, metadata, push_urls)
658 uploaded.append((filename, metadata))
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000659 logging.info('Waiting for all files to finish zipping')
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000660 zip_pool.join()
661
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000662 logging.info('All files zipped.')
vadimsh@chromium.org35122be2013-09-19 02:48:00 +0000663 upload_pool.join()
664
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000665 logging.info('All files are uploaded')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000666
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000667 total = len(infiles)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000668 total_size = sum(metadata.get('s', 0) for metadata in infiles.itervalues())
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000669 logging.info(
670 'Total: %6d, %9.1fkb',
671 total,
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000672 sum(m.get('s', 0) for m in infiles.itervalues()) / 1024.)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000673 cache_hit = set(infiles.iterkeys()) - set(x[0] for x in uploaded)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000674 cache_hit_size = sum(infiles[i].get('s', 0) for i in cache_hit)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000675 logging.info(
676 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
677 len(cache_hit),
678 cache_hit_size / 1024.,
679 len(cache_hit) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000680 cache_hit_size * 100. / total_size if total_size else 0)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000681 cache_miss = uploaded
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000682 cache_miss_size = sum(infiles[i[0]].get('s', 0) for i in cache_miss)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000683 logging.info(
684 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
685 len(cache_miss),
686 cache_miss_size / 1024.,
687 len(cache_miss) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000688 cache_miss_size * 100. / total_size if total_size else 0)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000689 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000690
691
maruel@chromium.org41601642013-09-18 19:40:46 +0000692class MemoryCache(object):
693 """This class is intended to be usable everywhere the Cache class is.
694
695 Instead of downloading to a cache, all files are kept in memory to be stored
696 in the target directory directly.
697 """
698
699 def __init__(self, target_directory, pool, remote):
700 self.target_directory = target_directory
701 self.pool = pool
702 self.remote = remote
703 self._lock = threading.Lock()
704 self._contents = {}
705
706 def retrieve(self, priority, item, size):
707 """Gets the requested file."""
708 self.pool.add_task(priority, self._store, item, size)
709
710 def wait_for(self, items):
711 """Starts a loop that waits for at least one of |items| to be retrieved.
712
713 Returns the first item retrieved.
714 """
715 with self._lock:
716 # Flush items already present.
717 for item in items:
718 if item in self._contents:
719 return item
720
721 while True:
722 downloaded = self.pool.get_one_result()
723 if downloaded in items:
724 return downloaded
725
726 def path(self, item):
727 return os.path.join(self.target_directory, item)
728
729 def read(self, item):
730 return self._contents[item]
731
732 def _store(self, item, size):
733 data = ''.join(self.remote.fetch(item, size))
734 with self._lock:
735 self._contents[item] = data
736 return item
737
738 def __enter__(self):
739 return self
740
741 def __exit__(self, _exc_type, _exec_value, _traceback):
742 return False
743
744
745def load_isolated(content, os_flavor, algo):
746 """Verifies the .isolated file is valid and loads this object with the json
747 data.
748 """
749 try:
750 data = json.loads(content)
751 except ValueError:
752 raise ConfigError('Failed to parse: %s...' % content[:100])
753
754 if not isinstance(data, dict):
755 raise ConfigError('Expected dict, got %r' % data)
756
757 for key, value in data.iteritems():
758 if key == 'command':
759 if not isinstance(value, list):
760 raise ConfigError('Expected list, got %r' % value)
761 if not value:
762 raise ConfigError('Expected non-empty command')
763 for subvalue in value:
764 if not isinstance(subvalue, basestring):
765 raise ConfigError('Expected string, got %r' % subvalue)
766
767 elif key == 'files':
768 if not isinstance(value, dict):
769 raise ConfigError('Expected dict, got %r' % value)
770 for subkey, subvalue in value.iteritems():
771 if not isinstance(subkey, basestring):
772 raise ConfigError('Expected string, got %r' % subkey)
773 if not isinstance(subvalue, dict):
774 raise ConfigError('Expected dict, got %r' % subvalue)
775 for subsubkey, subsubvalue in subvalue.iteritems():
776 if subsubkey == 'l':
777 if not isinstance(subsubvalue, basestring):
778 raise ConfigError('Expected string, got %r' % subsubvalue)
779 elif subsubkey == 'm':
780 if not isinstance(subsubvalue, int):
781 raise ConfigError('Expected int, got %r' % subsubvalue)
782 elif subsubkey == 'h':
783 if not is_valid_hash(subsubvalue, algo):
784 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
785 elif subsubkey == 's':
786 if not isinstance(subsubvalue, int):
787 raise ConfigError('Expected int, got %r' % subsubvalue)
788 else:
789 raise ConfigError('Unknown subsubkey %s' % subsubkey)
790 if bool('h' in subvalue) and bool('l' in subvalue):
791 raise ConfigError(
792 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
793 subvalue)
794
795 elif key == 'includes':
796 if not isinstance(value, list):
797 raise ConfigError('Expected list, got %r' % value)
798 if not value:
799 raise ConfigError('Expected non-empty includes list')
800 for subvalue in value:
801 if not is_valid_hash(subvalue, algo):
802 raise ConfigError('Expected sha-1, got %r' % subvalue)
803
804 elif key == 'read_only':
805 if not isinstance(value, bool):
806 raise ConfigError('Expected bool, got %r' % value)
807
808 elif key == 'relative_cwd':
809 if not isinstance(value, basestring):
810 raise ConfigError('Expected string, got %r' % value)
811
812 elif key == 'os':
813 if os_flavor and value != os_flavor:
814 raise ConfigError(
815 'Expected \'os\' to be \'%s\' but got \'%s\'' %
816 (os_flavor, value))
817
818 else:
819 raise ConfigError('Unknown key %s' % key)
820
821 return data
822
823
824class IsolatedFile(object):
825 """Represents a single parsed .isolated file."""
826 def __init__(self, obj_hash, algo):
827 """|obj_hash| is really the sha-1 of the file."""
828 logging.debug('IsolatedFile(%s)' % obj_hash)
829 self.obj_hash = obj_hash
830 self.algo = algo
831 # Set once all the left-side of the tree is parsed. 'Tree' here means the
832 # .isolate and all the .isolated files recursively included by it with
833 # 'includes' key. The order of each sha-1 in 'includes', each representing a
834 # .isolated file in the hash table, is important, as the later ones are not
835 # processed until the firsts are retrieved and read.
836 self.can_fetch = False
837
838 # Raw data.
839 self.data = {}
840 # A IsolatedFile instance, one per object in self.includes.
841 self.children = []
842
843 # Set once the .isolated file is loaded.
844 self._is_parsed = False
845 # Set once the files are fetched.
846 self.files_fetched = False
847
848 def load(self, content):
849 """Verifies the .isolated file is valid and loads this object with the json
850 data.
851 """
852 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
853 assert not self._is_parsed
854 self.data = load_isolated(content, None, self.algo)
855 self.children = [
856 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
857 ]
858 self._is_parsed = True
859
860 def fetch_files(self, cache, files):
861 """Adds files in this .isolated file not present in |files| dictionary.
862
863 Preemptively request files.
864
865 Note that |files| is modified by this function.
866 """
867 assert self.can_fetch
868 if not self._is_parsed or self.files_fetched:
869 return
870 logging.debug('fetch_files(%s)' % self.obj_hash)
871 for filepath, properties in self.data.get('files', {}).iteritems():
872 # Root isolated has priority on the files being mapped. In particular,
873 # overriden files must not be fetched.
874 if filepath not in files:
875 files[filepath] = properties
876 if 'h' in properties:
877 # Preemptively request files.
878 logging.debug('fetching %s' % filepath)
879 cache.retrieve(
880 WorkerPool.MED,
881 properties['h'],
882 properties['s'])
883 self.files_fetched = True
884
885
886class Settings(object):
887 """Results of a completely parsed .isolated file."""
888 def __init__(self):
889 self.command = []
890 self.files = {}
891 self.read_only = None
892 self.relative_cwd = None
893 # The main .isolated file, a IsolatedFile instance.
894 self.root = None
895
896 def load(self, cache, root_isolated_hash, algo):
897 """Loads the .isolated and all the included .isolated asynchronously.
898
899 It enables support for "included" .isolated files. They are processed in
900 strict order but fetched asynchronously from the cache. This is important so
901 that a file in an included .isolated file that is overridden by an embedding
902 .isolated file is not fetched needlessly. The includes are fetched in one
903 pass and the files are fetched as soon as all the ones on the left-side
904 of the tree were fetched.
905
906 The prioritization is very important here for nested .isolated files.
907 'includes' have the highest priority and the algorithm is optimized for both
908 deep and wide trees. A deep one is a long link of .isolated files referenced
909 one at a time by one item in 'includes'. A wide one has a large number of
910 'includes' in a single .isolated file. 'left' is defined as an included
911 .isolated file earlier in the 'includes' list. So the order of the elements
912 in 'includes' is important.
913 """
914 self.root = IsolatedFile(root_isolated_hash, algo)
915
916 # Isolated files being retrieved now: hash -> IsolatedFile instance.
917 pending = {}
918 # Set of hashes of already retrieved items to refuse recursive includes.
919 seen = set()
920
921 def retrieve(isolated_file):
922 h = isolated_file.obj_hash
923 if h in seen:
924 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
925 assert h not in pending
926 seen.add(h)
927 pending[h] = isolated_file
928 cache.retrieve(WorkerPool.HIGH, h, UNKNOWN_FILE_SIZE)
929
930 retrieve(self.root)
931
932 while pending:
933 item_hash = cache.wait_for(pending)
934 item = pending.pop(item_hash)
935 item.load(cache.read(item_hash))
936 if item_hash == root_isolated_hash:
937 # It's the root item.
938 item.can_fetch = True
939
940 for new_child in item.children:
941 retrieve(new_child)
942
943 # Traverse the whole tree to see if files can now be fetched.
944 self._traverse_tree(cache, self.root)
945
946 def check(n):
947 return all(check(x) for x in n.children) and n.files_fetched
948 assert check(self.root)
949
950 self.relative_cwd = self.relative_cwd or ''
951 self.read_only = self.read_only or False
952
953 def _traverse_tree(self, cache, node):
954 if node.can_fetch:
955 if not node.files_fetched:
956 self._update_self(cache, node)
957 will_break = False
958 for i in node.children:
959 if not i.can_fetch:
960 if will_break:
961 break
962 # Automatically mark the first one as fetcheable.
963 i.can_fetch = True
964 will_break = True
965 self._traverse_tree(cache, i)
966
967 def _update_self(self, cache, node):
968 node.fetch_files(cache, self.files)
969 # Grabs properties.
970 if not self.command and node.data.get('command'):
971 self.command = node.data['command']
972 if self.read_only is None and node.data.get('read_only') is not None:
973 self.read_only = node.data['read_only']
974 if (self.relative_cwd is None and
975 node.data.get('relative_cwd') is not None):
976 self.relative_cwd = node.data['relative_cwd']
977
978
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000979@subcommand.usage('<file1..fileN> or - to read from stdin')
980def CMDarchive(parser, args):
981 """Archives data to the server."""
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000982 options, files = parser.parse_args(args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000983
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000984 if files == ['-']:
985 files = sys.stdin.readlines()
986
987 if not files:
988 parser.error('Nothing to upload')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000989 if not options.isolate_server:
990 parser.error('Nowhere to send. Please specify --isolate-server')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000991
992 # Load the necessary metadata. This is going to be rewritten eventually to be
993 # more efficient.
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000994 algo = hashlib.sha1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000995 infiles = dict(
996 (
997 f,
998 {
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000999 's': os.stat(f).st_size,
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001000 'h': hash_file(f, algo),
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001001 }
1002 )
1003 for f in files)
1004
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001005 with tools.Profiler('Archive'):
maruel@chromium.org7b844a62013-09-17 13:04:59 +00001006 ret = upload_tree(
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001007 base_url=options.isolate_server,
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001008 indir=os.getcwd(),
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001009 infiles=infiles,
1010 namespace=options.namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001011 if not ret:
1012 print '\n'.join('%s %s' % (infiles[f]['h'], f) for f in sorted(infiles))
1013 return ret
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001014
1015
1016def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001017 """Download data from the server.
1018
1019 It can download individual files.
1020 """
1021 parser.add_option(
1022 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
1023 help='hash and destination of a file, can be used multiple times')
1024 parser.add_option(
1025 '-t', '--target', metavar='DIR', default=os.getcwd(),
1026 help='destination directory')
1027 options, args = parser.parse_args(args)
1028 if args:
1029 parser.error('Unsupported arguments: %s' % args)
1030 if not options.file:
1031 parser.error('Use one of --file is required.')
1032
1033 options.target = os.path.abspath(options.target)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +00001034 remote = get_storage_api(options.isolate_server, options.namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001035 for h, dest in options.file:
1036 logging.info('%s: %s', h, dest)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +00001037 file_write(
1038 os.path.join(options.target, dest),
1039 remote.fetch(h, UNKNOWN_FILE_SIZE))
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001040 return 0
1041
1042
1043class OptionParserIsolateServer(tools.OptionParserWithLogging):
1044 def __init__(self, **kwargs):
1045 tools.OptionParserWithLogging.__init__(self, **kwargs)
1046 self.add_option(
1047 '-I', '--isolate-server',
1048 default=ISOLATE_SERVER,
1049 metavar='URL',
1050 help='Isolate server where data is stored. default: %default')
1051 self.add_option(
1052 '--namespace', default='default-gzip',
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00001053 help='The namespace to use on the server, default: %default')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001054
1055 def parse_args(self, *args, **kwargs):
1056 options, args = tools.OptionParserWithLogging.parse_args(
1057 self, *args, **kwargs)
1058 options.isolate_server = options.isolate_server.rstrip('/')
1059 if not options.isolate_server:
1060 self.error('--isolate-server is required.')
1061 return options, args
1062
1063
1064def main(args):
1065 dispatcher = subcommand.CommandDispatcher(__name__)
1066 try:
1067 return dispatcher.execute(
1068 OptionParserIsolateServer(version=__version__), args)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001069 except (ConfigError, MappingError) as e:
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001070 sys.stderr.write('\nError: ')
1071 sys.stderr.write(str(e))
1072 sys.stderr.write('\n')
1073 return 1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001074
1075
1076if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001077 fix_encoding.fix_encoding()
1078 tools.disable_buffering()
1079 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001080 sys.exit(main(sys.argv[1:]))