blob: 0f7c5ff613ec46702a02051f880e284c77452828 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Archives a set of files to a server."""
7
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +00008__version__ = '0.2'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000010import binascii
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +000011import cStringIO
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000012import hashlib
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +000013import itertools
maruel@chromium.org41601642013-09-18 19:40:46 +000014import json
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000015import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000016import os
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000017import random
18import re
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000019import sys
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000020import threading
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000021import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000022import urllib
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000023import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000024
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000025from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
28
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000029from utils import net
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000030from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000031from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000032
33
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000034# Default server.
35# TODO(maruel): Chromium-specific.
36ISOLATE_SERVER = 'https://isolateserver-dev.appspot.com/'
37
38
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000039# The minimum size of files to upload directly to the blobstore.
maruel@chromium.orgaef29f82012-12-12 15:00:42 +000040MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 1024
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000041
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000042# The number of files to check the isolate server per /contains query.
43# All files are sorted by likelihood of a change in the file content
44# (currently file size is used to estimate this: larger the file -> larger the
45# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
46# are taken and send to '/contains', then next ITEMS_PER_CONTAINS_QUERIES[1],
47# and so on. Numbers here is a trade-off; the more per request, the lower the
48# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
49# larger values cause longer lookups, increasing the initial latency to start
50# uploading, which is especially an issue for large files. This value is
51# optimized for the "few thousands files to look up with minimal number of large
52# files missing" case.
53ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000054
maruel@chromium.org9958e4a2013-09-17 00:01:48 +000055
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000056# A list of already compressed extension types that should not receive any
57# compression before being uploaded.
58ALREADY_COMPRESSED_TYPES = [
59 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'pdf', 'png',
60 'wav', 'zip'
61]
62
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000063
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000064# The file size to be used when we don't know the correct file size,
65# generally used for .isolated files.
66UNKNOWN_FILE_SIZE = None
67
68
69# The size of each chunk to read when downloading and unzipping files.
70ZIPPED_FILE_CHUNK = 16 * 1024
71
72
maruel@chromium.org8750e4b2013-09-18 02:37:57 +000073# Chunk size to use when doing disk I/O.
74DISK_FILE_CHUNK = 1024 * 1024
75
76
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +000077# Read timeout in seconds for downloads from isolate storage. If there's no
78# response from the server within this timeout whole download will be aborted.
79DOWNLOAD_READ_TIMEOUT = 60
80
81
maruel@chromium.org41601642013-09-18 19:40:46 +000082# The delay (in seconds) to wait between logging statements when retrieving
83# the required files. This is intended to let the user (or buildbot) know that
84# the program is still running.
85DELAY_BETWEEN_UPDATES_IN_SECS = 30
86
87
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000088class ConfigError(ValueError):
89 """Generic failure to load a .isolated file."""
90 pass
91
92
93class MappingError(OSError):
94 """Failed to recreate the tree."""
95 pass
96
97
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +000098def randomness():
99 """Generates low-entropy randomness for MIME encoding.
100
101 Exists so it can be mocked out in unit tests.
102 """
103 return str(time.time())
104
105
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000106def encode_multipart_formdata(fields, files,
107 mime_mapper=lambda _: 'application/octet-stream'):
108 """Encodes a Multipart form data object.
109
110 Args:
111 fields: a sequence (name, value) elements for
112 regular form fields.
113 files: a sequence of (name, filename, value) elements for data to be
114 uploaded as files.
115 mime_mapper: function to return the mime type from the filename.
116 Returns:
117 content_type: for httplib.HTTP instance
118 body: for httplib.HTTP instance
119 """
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000120 boundary = hashlib.md5(randomness()).hexdigest()
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000121 body_list = []
122 for (key, value) in fields:
123 if isinstance(key, unicode):
124 value = key.encode('utf-8')
125 if isinstance(value, unicode):
126 value = value.encode('utf-8')
127 body_list.append('--' + boundary)
128 body_list.append('Content-Disposition: form-data; name="%s"' % key)
129 body_list.append('')
130 body_list.append(value)
131 body_list.append('--' + boundary)
132 body_list.append('')
133 for (key, filename, value) in files:
134 if isinstance(key, unicode):
135 value = key.encode('utf-8')
136 if isinstance(filename, unicode):
137 value = filename.encode('utf-8')
138 if isinstance(value, unicode):
139 value = value.encode('utf-8')
140 body_list.append('--' + boundary)
141 body_list.append('Content-Disposition: form-data; name="%s"; '
142 'filename="%s"' % (key, filename))
143 body_list.append('Content-Type: %s' % mime_mapper(filename))
144 body_list.append('')
145 body_list.append(value)
146 body_list.append('--' + boundary)
147 body_list.append('')
148 if body_list:
149 body_list[-2] += '--'
150 body = '\r\n'.join(body_list)
151 content_type = 'multipart/form-data; boundary=%s' % boundary
152 return content_type, body
153
154
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000155def is_valid_hash(value, algo):
156 """Returns if the value is a valid hash for the corresponding algorithm."""
157 size = 2 * algo().digest_size
158 return bool(re.match(r'^[a-fA-F0-9]{%d}$' % size, value))
159
160
161def hash_file(filepath, algo):
162 """Calculates the hash of a file without reading it all in memory at once.
163
164 |algo| should be one of hashlib hashing algorithm.
165 """
166 digest = algo()
maruel@chromium.org037758d2012-12-10 17:59:46 +0000167 with open(filepath, 'rb') as f:
168 while True:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000169 chunk = f.read(DISK_FILE_CHUNK)
maruel@chromium.org037758d2012-12-10 17:59:46 +0000170 if not chunk:
171 break
172 digest.update(chunk)
173 return digest.hexdigest()
174
175
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000176def file_read(filepath):
177 """Yields file content."""
178 with open(filepath, 'rb') as f:
179 while True:
180 data = f.read(DISK_FILE_CHUNK)
181 if not data:
182 break
183 yield data
184
185
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000186def file_write(filepath, content_generator):
187 """Writes file content as generated by content_generator.
188
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000189 Creates the intermediary directory as needed.
190
191 Returns the number of bytes written.
192
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000193 Meant to be mocked out in unit tests.
194 """
195 filedir = os.path.dirname(filepath)
196 if not os.path.isdir(filedir):
197 os.makedirs(filedir)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000198 total = 0
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000199 with open(filepath, 'wb') as f:
200 for d in content_generator:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000201 total += len(d)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000202 f.write(d)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000203 return total
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000204
205
maruel@chromium.orgaf254852013-09-17 17:48:14 +0000206def create_directories(base_directory, files):
207 """Creates the directory structure needed by the given list of files."""
208 logging.debug('create_directories(%s, %d)', base_directory, len(files))
209 # Creates the tree of directories to create.
210 directories = set(os.path.dirname(f) for f in files)
211 for item in list(directories):
212 while item:
213 directories.add(item)
214 item = os.path.dirname(item)
215 for d in sorted(directories):
216 if d:
217 os.mkdir(os.path.join(base_directory, d))
218
219
220def create_links(base_directory, files):
221 """Creates any links needed by the given set of files."""
222 for filepath, properties in files:
223 if 'l' not in properties:
224 continue
225 if sys.platform == 'win32':
226 # TODO(maruel): Create junctions or empty text files similar to what
227 # cygwin do?
228 logging.warning('Ignoring symlink %s', filepath)
229 continue
230 outfile = os.path.join(base_directory, filepath)
231 # symlink doesn't exist on Windows. So the 'link' property should
232 # never be specified for windows .isolated file.
233 os.symlink(properties['l'], outfile) # pylint: disable=E1101
234 if 'm' in properties:
235 lchmod = getattr(os, 'lchmod', None)
236 if lchmod:
237 lchmod(outfile, properties['m'])
238
239
240def setup_commands(base_directory, cwd, cmd):
241 """Correctly adjusts and then returns the required working directory
242 and command needed to run the test.
243 """
244 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
245 cwd = os.path.join(base_directory, cwd)
246 if not os.path.isdir(cwd):
247 os.makedirs(cwd)
248
249 # Ensure paths are correctly separated on windows.
250 cmd[0] = cmd[0].replace('/', os.path.sep)
251 cmd = tools.fix_python_path(cmd)
252
253 return cwd, cmd
254
255
256def generate_remaining_files(files):
257 """Generates a dictionary of all the remaining files to be downloaded."""
258 remaining = {}
259 for filepath, props in files:
260 if 'h' in props:
261 remaining.setdefault(props['h'], []).append((filepath, props))
262
263 return remaining
264
265
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000266def is_valid_file(filepath, size):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000267 """Determines if the given files appears valid.
268
269 Currently it just checks the file's size.
270 """
271 if size == UNKNOWN_FILE_SIZE:
272 return True
273 actual_size = os.stat(filepath).st_size
274 if size != actual_size:
275 logging.warning(
276 'Found invalid item %s; %d != %d',
277 os.path.basename(filepath), actual_size, size)
278 return False
279 return True
280
281
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000282def try_remove(filepath):
283 """Removes a file without crashing even if it doesn't exist."""
284 try:
285 os.remove(filepath)
286 except OSError:
287 pass
288
289
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000290def url_read(url, **kwargs):
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000291 result = net.url_read(url, **kwargs)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000292 if result is None:
maruel@chromium.orgef333122013-03-12 20:36:40 +0000293 # If we get no response from the server, assume it is down and raise an
294 # exception.
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000295 raise MappingError('Unable to connect to server %s' % url)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000296 return result
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000297
298
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000299class IsolateServer(object):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000300 """Client class to download or upload to Isolate Server."""
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000301 def __init__(self, base_url, namespace):
302 assert base_url.startswith('http'), base_url
303 self.content_url = base_url.rstrip('/') + '/content/'
304 self.namespace = namespace
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000305 self._token = None
306 self._lock = threading.Lock()
307
308 @property
309 def token(self):
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000310 # TODO(maruel): Make this request much earlier asynchronously while the
311 # files are being enumerated.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000312 with self._lock:
313 if not self._token:
314 self._token = urllib.quote(url_read(self.content_url + 'get_token'))
315 return self._token
316
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000317 def fetch(self, item, expected_size):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000318 """Fetches an object and yields its content."""
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000319 assert isinstance(item, basestring)
320 assert (
321 isinstance(expected_size, (int, long)) or
322 expected_size == UNKNOWN_FILE_SIZE)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000323 zipped_url = '%sretrieve/%s/%s' % (self.content_url, self.namespace, item)
324 logging.debug('download_file(%s)', zipped_url)
325
326 # Because the app engine DB is only eventually consistent, retry 404 errors
327 # because the file might just not be visible yet (even though it has been
328 # uploaded).
329 connection = net.url_open(
330 zipped_url, retry_404=True, read_timeout=DOWNLOAD_READ_TIMEOUT)
331 if not connection:
332 raise IOError('Unable to open connection to %s' % zipped_url)
333
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000334 # TODO(maruel): Must only decompress when needed.
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000335 decompressor = zlib.decompressobj()
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000336 try:
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000337 compressed_size = 0
338 decompressed_size = 0
339 while True:
340 chunk = connection.read(ZIPPED_FILE_CHUNK)
341 if not chunk:
342 break
343 compressed_size += len(chunk)
344 decompressed = decompressor.decompress(chunk)
345 decompressed_size += len(decompressed)
346 yield decompressed
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000347
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000348 # Ensure that all the data was properly decompressed.
349 uncompressed_data = decompressor.flush()
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000350 if uncompressed_data:
351 raise IOError('Decompression failed')
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000352 if (expected_size != UNKNOWN_FILE_SIZE and
353 decompressed_size != expected_size):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000354 raise IOError('File incorrect size after download of %s. Got %s and '
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000355 'expected %s' % (item, decompressed_size, expected_size))
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000356 except zlib.error as e:
357 msg = 'Corrupted zlib for item %s. Processed %d of %s bytes.\n%s' % (
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000358 item, compressed_size, connection.content_length, e)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000359 logging.warning(msg)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000360
361 # Testing seems to show that if a few machines are trying to download
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000362 # the same blob, they can cause each other to fail. So if we hit a zip
363 # error, this is the most likely cause (it only downloads some of the
364 # data). Randomly sleep for between 5 and 25 seconds to try and spread
365 # out the downloads.
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000366 sleep_duration = (random.random() * 20) + 5
367 time.sleep(sleep_duration)
368 raise IOError(msg)
maruel@chromium.orgc2bfef42013-08-30 21:46:26 +0000369
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000370 def push(self, item, expected_size, content_generator):
371 """Uploads content generated by |content_generator| as |item| to the remote
372 isolate server.
373 """
374 assert isinstance(item, basestring)
375 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
376 item = str(item)
377 # TODO(maruel): Support large files. This would require streaming support.
378 content = ''.join(content_generator)
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000379 if len(content) > MIN_SIZE_FOR_DIRECT_BLOBSTORE:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000380 return self._upload_hash_content_to_blobstore(item, content)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000381
382 url = '%sstore/%s/%s?token=%s' % (
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000383 self.content_url, self.namespace, item, self.token)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000384 return url_read(
385 url, data=content, content_type='application/octet-stream')
386
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000387 def _upload_hash_content_to_blobstore(self, item, content):
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000388 """Uploads the content directly to the blobstore via a generated url."""
389 # TODO(maruel): Support large files. This would require streaming support.
390 gen_url = '%sgenerate_blobstore_url/%s/%s' % (
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000391 self.content_url, self.namespace, item)
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000392 # Token is guaranteed to be already quoted but it is unnecessary here, and
393 # only here.
394 data = [('token', urllib.unquote(self.token))]
395 content_type, body = encode_multipart_formdata(
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000396 data, [('content', item, content)])
maruel@chromium.orgd1e20c92013-09-17 20:54:26 +0000397 last_url = gen_url
398 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
399 # Retry HTTP 50x here but not 404.
400 upload_url = net.url_read(gen_url, data=data)
401 if not upload_url:
402 raise MappingError('Unable to connect to server %s' % gen_url)
403 last_url = upload_url
404
405 # Do not retry this request on HTTP 50x. Regenerate an upload url each
406 # time since uploading "consumes" the upload url.
407 result = net.url_read(
408 upload_url, data=body, content_type=content_type, retry_50x=False)
409 if result is not None:
410 return result
411 raise MappingError('Unable to connect to server %s' % last_url)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000412
413
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000414def check_files_exist_on_server(query_url, queries):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000415 """Queries the server to see which files from this batch already exist there.
416
417 Arguments:
418 queries: The hash files to potential upload to the server.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000419 Returns:
420 missing_files: list of files that are missing on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000421 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000422 # TODO(maruel): Move inside IsolateServer.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000423 logging.info('Checking existence of %d files...', len(queries))
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000424 body = ''.join(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000425 (binascii.unhexlify(meta_data['h']) for (_, meta_data) in queries))
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000426 assert (len(body) % 20) == 0, repr(body)
427
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000428 response = url_read(
429 query_url, data=body, content_type='application/octet-stream')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000430 if len(queries) != len(response):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000431 raise MappingError(
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000432 'Got an incorrect number of responses from the server. Expected %d, '
433 'but got %d' % (len(queries), len(response)))
434
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000435 missing_files = [
436 queries[i] for i, flag in enumerate(response) if flag == chr(0)
437 ]
438 logging.info('Queried %d files, %d cache hit',
439 len(queries), len(queries) - len(missing_files))
440 return missing_files
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000441
442
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000443class FileSystem(object):
444 """Fetches data from the file system.
445
446 The common use case is a NFS/CIFS file server that is mounted locally that is
447 used to fetch the file on a local partition.
448 """
449 def __init__(self, base_path):
450 self.base_path = base_path
451
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000452 def fetch(self, item, expected_size):
453 assert isinstance(item, basestring)
454 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000455 source = os.path.join(self.base_path, item)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000456 if (expected_size != UNKNOWN_FILE_SIZE and
457 not is_valid_file(source, expected_size)):
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000458 raise IOError('Invalid file %s' % item)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000459 return file_read(source)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000460
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000461 def push(self, item, expected_size, content_generator):
462 assert isinstance(item, basestring)
463 assert isinstance(expected_size, int) or expected_size == UNKNOWN_FILE_SIZE
464 dest = os.path.join(self.base_path, item)
465 total = file_write(dest, content_generator)
466 if expected_size != UNKNOWN_FILE_SIZE and total != expected_size:
467 os.remove(dest)
maruel@chromium.orge45728d2013-09-16 23:23:22 +0000468 raise IOError(
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000469 'Invalid file %s, %d != %d' % (item, total, expected_size))
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000470
471
472def get_storage_api(file_or_url, namespace):
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000473 """Returns an object that implements .fetch() and .push()."""
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000474 if re.match(r'^https?://.+$', file_or_url):
475 return IsolateServer(file_or_url, namespace)
476 else:
477 return FileSystem(file_or_url)
478
479
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000480class WorkerPool(threading_utils.AutoRetryThreadPool):
481 """Thread pool that automatically retries on IOError and runs a preconfigured
482 function.
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000483 """
484 # Initial and maximum number of worker threads.
485 INITIAL_WORKERS = 2
486 MAX_WORKERS = 16
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000487 RETRIES = 5
488
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000489 def __init__(self):
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000490 super(WorkerPool, self).__init__(
491 [IOError],
492 self.RETRIES,
493 self.INITIAL_WORKERS,
494 self.MAX_WORKERS,
495 0,
496 'remote')
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000497
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000498
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000499def compression_level(filename):
500 """Given a filename calculates the ideal compression level to use."""
501 file_ext = os.path.splitext(filename)[1].lower()
502 # TODO(csharp): Profile to find what compression level works best.
503 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
504
505
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000506def read_and_compress(filepath, level):
507 """Reads a file and returns its content gzip compressed."""
508 compressor = zlib.compressobj(level)
509 compressed_data = cStringIO.StringIO()
510 with open(filepath, 'rb') as f:
511 while True:
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000512 chunk = f.read(ZIPPED_FILE_CHUNK)
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000513 if not chunk:
514 break
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000515 compressed_data.write(compressor.compress(chunk))
516 compressed_data.write(compressor.flush(zlib.Z_FINISH))
517 value = compressed_data.getvalue()
518 compressed_data.close()
519 return value
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000520
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000521
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000522def zip_and_trigger_upload(infile, metadata, add_item):
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000523 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
524 # if not metadata['T']:
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000525 # TODO(maruel): Use a generator?
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000526 compressed_data = read_and_compress(infile, compression_level(infile))
527 priority = (
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000528 WorkerPool.HIGH if metadata.get('priority', '1') == '0'
529 else WorkerPool.MED)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000530 return add_item(priority, metadata['h'], [compressed_data])
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000531
532
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000533def batch_files_for_check(infiles):
534 """Splits list of files to check for existence on the server into batches.
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000535
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000536 Each batch corresponds to a single 'exists?' query to the server.
537
538 Yields:
539 batches: list of batches, each batch is a list of files.
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000540 """
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +0000541 batch_count = 0
542 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000543 next_queries = []
csharp@chromium.org90c45812013-01-23 14:27:21 +0000544 items = ((k, v) for k, v in infiles.iteritems() if 's' in v)
545 for relfile, metadata in sorted(items, key=lambda x: -x[1]['s']):
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000546 next_queries.append((relfile, metadata))
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +0000547 if len(next_queries) == batch_size_limit:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000548 yield next_queries
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000549 next_queries = []
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +0000550 batch_count += 1
551 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
552 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000553 if next_queries:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000554 yield next_queries
555
556
557def get_files_to_upload(contains_hash_url, infiles):
558 """Yields files that are missing on the server."""
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000559 with threading_utils.ThreadPool(1, 16, 0, prefix='get_files_to_upload') as tp:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000560 for files in batch_files_for_check(infiles):
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000561 tp.add_task(0, check_files_exist_on_server, contains_hash_url, files)
562 for missing_file in itertools.chain.from_iterable(tp.iter_results()):
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000563 yield missing_file
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000564
565
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000566def upload_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000567 """Uploads the given tree to the given url.
568
569 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000570 base_url: The base url, it is assume that |base_url|/has/ can be used to
571 query if an element was already uploaded, and |base_url|/store/
572 can be used to upload a new element.
573 indir: Root directory the infiles are based in.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000574 infiles: dict of files to upload files from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000575 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000576 """
577 logging.info('upload tree(base_url=%s, indir=%s, files=%d)' %
578 (base_url, indir, len(infiles)))
maruel@chromium.org034e3962013-03-13 13:34:25 +0000579
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000580 # Create a pool of workers to zip and upload any files missing from
581 # the server.
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000582 num_threads = threading_utils.num_processors()
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000583 remote = get_storage_api(base_url, namespace)
584 # TODO(maruel): There's three separate thread pools here, it is not very
585 # efficient. remote_uploader and get_files_to_upload() should share the same
586 # pool and control priorities accordingly.
587 uploaded = []
588 with WorkerPool() as remote_uploader:
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000589 # Starts the zip and upload process for files that are missing
590 # from the server.
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000591 # TODO(maruel): Move .contains() to the API.
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000592 contains_hash_url = '%scontains/%s?token=%s' % (
593 remote.content_url, namespace, remote.token)
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000594
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000595 def add_item(priority, item, content_generator):
596 remote_uploader.add_task(
597 priority, remote.push, item, UNKNOWN_FILE_SIZE, content_generator)
598
599 with threading_utils.ThreadPool(
600 min(2, num_threads), num_threads, 0, 'zip') as zipping_pool:
601 for relfile, metadata in get_files_to_upload(contains_hash_url, infiles):
602 infile = os.path.join(indir, relfile)
603 zipping_pool.add_task(
604 0, zip_and_trigger_upload, infile, metadata, add_item)
605 uploaded.append((relfile, metadata))
606
607 logging.info('Waiting for all files to finish zipping')
608 zipping_pool.join()
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000609 logging.info('All files zipped.')
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000610 remote_uploader.join()
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000611 logging.info('All files are uploaded')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000612
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000613 total = len(infiles)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000614 total_size = sum(metadata.get('s', 0) for metadata in infiles.itervalues())
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000615 logging.info(
616 'Total: %6d, %9.1fkb',
617 total,
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000618 sum(m.get('s', 0) for m in infiles.itervalues()) / 1024.)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000619 cache_hit = set(infiles.iterkeys()) - set(x[0] for x in uploaded)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000620 cache_hit_size = sum(infiles[i].get('s', 0) for i in cache_hit)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000621 logging.info(
622 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
623 len(cache_hit),
624 cache_hit_size / 1024.,
625 len(cache_hit) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000626 cache_hit_size * 100. / total_size if total_size else 0)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000627 cache_miss = uploaded
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000628 cache_miss_size = sum(infiles[i[0]].get('s', 0) for i in cache_miss)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000629 logging.info(
630 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
631 len(cache_miss),
632 cache_miss_size / 1024.,
633 len(cache_miss) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000634 cache_miss_size * 100. / total_size if total_size else 0)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000635 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000636
637
maruel@chromium.org41601642013-09-18 19:40:46 +0000638class MemoryCache(object):
639 """This class is intended to be usable everywhere the Cache class is.
640
641 Instead of downloading to a cache, all files are kept in memory to be stored
642 in the target directory directly.
643 """
644
645 def __init__(self, target_directory, pool, remote):
646 self.target_directory = target_directory
647 self.pool = pool
648 self.remote = remote
649 self._lock = threading.Lock()
650 self._contents = {}
651
652 def retrieve(self, priority, item, size):
653 """Gets the requested file."""
654 self.pool.add_task(priority, self._store, item, size)
655
656 def wait_for(self, items):
657 """Starts a loop that waits for at least one of |items| to be retrieved.
658
659 Returns the first item retrieved.
660 """
661 with self._lock:
662 # Flush items already present.
663 for item in items:
664 if item in self._contents:
665 return item
666
667 while True:
668 downloaded = self.pool.get_one_result()
669 if downloaded in items:
670 return downloaded
671
672 def path(self, item):
673 return os.path.join(self.target_directory, item)
674
675 def read(self, item):
676 return self._contents[item]
677
678 def _store(self, item, size):
679 data = ''.join(self.remote.fetch(item, size))
680 with self._lock:
681 self._contents[item] = data
682 return item
683
684 def __enter__(self):
685 return self
686
687 def __exit__(self, _exc_type, _exec_value, _traceback):
688 return False
689
690
691def load_isolated(content, os_flavor, algo):
692 """Verifies the .isolated file is valid and loads this object with the json
693 data.
694 """
695 try:
696 data = json.loads(content)
697 except ValueError:
698 raise ConfigError('Failed to parse: %s...' % content[:100])
699
700 if not isinstance(data, dict):
701 raise ConfigError('Expected dict, got %r' % data)
702
703 for key, value in data.iteritems():
704 if key == 'command':
705 if not isinstance(value, list):
706 raise ConfigError('Expected list, got %r' % value)
707 if not value:
708 raise ConfigError('Expected non-empty command')
709 for subvalue in value:
710 if not isinstance(subvalue, basestring):
711 raise ConfigError('Expected string, got %r' % subvalue)
712
713 elif key == 'files':
714 if not isinstance(value, dict):
715 raise ConfigError('Expected dict, got %r' % value)
716 for subkey, subvalue in value.iteritems():
717 if not isinstance(subkey, basestring):
718 raise ConfigError('Expected string, got %r' % subkey)
719 if not isinstance(subvalue, dict):
720 raise ConfigError('Expected dict, got %r' % subvalue)
721 for subsubkey, subsubvalue in subvalue.iteritems():
722 if subsubkey == 'l':
723 if not isinstance(subsubvalue, basestring):
724 raise ConfigError('Expected string, got %r' % subsubvalue)
725 elif subsubkey == 'm':
726 if not isinstance(subsubvalue, int):
727 raise ConfigError('Expected int, got %r' % subsubvalue)
728 elif subsubkey == 'h':
729 if not is_valid_hash(subsubvalue, algo):
730 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
731 elif subsubkey == 's':
732 if not isinstance(subsubvalue, int):
733 raise ConfigError('Expected int, got %r' % subsubvalue)
734 else:
735 raise ConfigError('Unknown subsubkey %s' % subsubkey)
736 if bool('h' in subvalue) and bool('l' in subvalue):
737 raise ConfigError(
738 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
739 subvalue)
740
741 elif key == 'includes':
742 if not isinstance(value, list):
743 raise ConfigError('Expected list, got %r' % value)
744 if not value:
745 raise ConfigError('Expected non-empty includes list')
746 for subvalue in value:
747 if not is_valid_hash(subvalue, algo):
748 raise ConfigError('Expected sha-1, got %r' % subvalue)
749
750 elif key == 'read_only':
751 if not isinstance(value, bool):
752 raise ConfigError('Expected bool, got %r' % value)
753
754 elif key == 'relative_cwd':
755 if not isinstance(value, basestring):
756 raise ConfigError('Expected string, got %r' % value)
757
758 elif key == 'os':
759 if os_flavor and value != os_flavor:
760 raise ConfigError(
761 'Expected \'os\' to be \'%s\' but got \'%s\'' %
762 (os_flavor, value))
763
764 else:
765 raise ConfigError('Unknown key %s' % key)
766
767 return data
768
769
770class IsolatedFile(object):
771 """Represents a single parsed .isolated file."""
772 def __init__(self, obj_hash, algo):
773 """|obj_hash| is really the sha-1 of the file."""
774 logging.debug('IsolatedFile(%s)' % obj_hash)
775 self.obj_hash = obj_hash
776 self.algo = algo
777 # Set once all the left-side of the tree is parsed. 'Tree' here means the
778 # .isolate and all the .isolated files recursively included by it with
779 # 'includes' key. The order of each sha-1 in 'includes', each representing a
780 # .isolated file in the hash table, is important, as the later ones are not
781 # processed until the firsts are retrieved and read.
782 self.can_fetch = False
783
784 # Raw data.
785 self.data = {}
786 # A IsolatedFile instance, one per object in self.includes.
787 self.children = []
788
789 # Set once the .isolated file is loaded.
790 self._is_parsed = False
791 # Set once the files are fetched.
792 self.files_fetched = False
793
794 def load(self, content):
795 """Verifies the .isolated file is valid and loads this object with the json
796 data.
797 """
798 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
799 assert not self._is_parsed
800 self.data = load_isolated(content, None, self.algo)
801 self.children = [
802 IsolatedFile(i, self.algo) for i in self.data.get('includes', [])
803 ]
804 self._is_parsed = True
805
806 def fetch_files(self, cache, files):
807 """Adds files in this .isolated file not present in |files| dictionary.
808
809 Preemptively request files.
810
811 Note that |files| is modified by this function.
812 """
813 assert self.can_fetch
814 if not self._is_parsed or self.files_fetched:
815 return
816 logging.debug('fetch_files(%s)' % self.obj_hash)
817 for filepath, properties in self.data.get('files', {}).iteritems():
818 # Root isolated has priority on the files being mapped. In particular,
819 # overriden files must not be fetched.
820 if filepath not in files:
821 files[filepath] = properties
822 if 'h' in properties:
823 # Preemptively request files.
824 logging.debug('fetching %s' % filepath)
825 cache.retrieve(
826 WorkerPool.MED,
827 properties['h'],
828 properties['s'])
829 self.files_fetched = True
830
831
832class Settings(object):
833 """Results of a completely parsed .isolated file."""
834 def __init__(self):
835 self.command = []
836 self.files = {}
837 self.read_only = None
838 self.relative_cwd = None
839 # The main .isolated file, a IsolatedFile instance.
840 self.root = None
841
842 def load(self, cache, root_isolated_hash, algo):
843 """Loads the .isolated and all the included .isolated asynchronously.
844
845 It enables support for "included" .isolated files. They are processed in
846 strict order but fetched asynchronously from the cache. This is important so
847 that a file in an included .isolated file that is overridden by an embedding
848 .isolated file is not fetched needlessly. The includes are fetched in one
849 pass and the files are fetched as soon as all the ones on the left-side
850 of the tree were fetched.
851
852 The prioritization is very important here for nested .isolated files.
853 'includes' have the highest priority and the algorithm is optimized for both
854 deep and wide trees. A deep one is a long link of .isolated files referenced
855 one at a time by one item in 'includes'. A wide one has a large number of
856 'includes' in a single .isolated file. 'left' is defined as an included
857 .isolated file earlier in the 'includes' list. So the order of the elements
858 in 'includes' is important.
859 """
860 self.root = IsolatedFile(root_isolated_hash, algo)
861
862 # Isolated files being retrieved now: hash -> IsolatedFile instance.
863 pending = {}
864 # Set of hashes of already retrieved items to refuse recursive includes.
865 seen = set()
866
867 def retrieve(isolated_file):
868 h = isolated_file.obj_hash
869 if h in seen:
870 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
871 assert h not in pending
872 seen.add(h)
873 pending[h] = isolated_file
874 cache.retrieve(WorkerPool.HIGH, h, UNKNOWN_FILE_SIZE)
875
876 retrieve(self.root)
877
878 while pending:
879 item_hash = cache.wait_for(pending)
880 item = pending.pop(item_hash)
881 item.load(cache.read(item_hash))
882 if item_hash == root_isolated_hash:
883 # It's the root item.
884 item.can_fetch = True
885
886 for new_child in item.children:
887 retrieve(new_child)
888
889 # Traverse the whole tree to see if files can now be fetched.
890 self._traverse_tree(cache, self.root)
891
892 def check(n):
893 return all(check(x) for x in n.children) and n.files_fetched
894 assert check(self.root)
895
896 self.relative_cwd = self.relative_cwd or ''
897 self.read_only = self.read_only or False
898
899 def _traverse_tree(self, cache, node):
900 if node.can_fetch:
901 if not node.files_fetched:
902 self._update_self(cache, node)
903 will_break = False
904 for i in node.children:
905 if not i.can_fetch:
906 if will_break:
907 break
908 # Automatically mark the first one as fetcheable.
909 i.can_fetch = True
910 will_break = True
911 self._traverse_tree(cache, i)
912
913 def _update_self(self, cache, node):
914 node.fetch_files(cache, self.files)
915 # Grabs properties.
916 if not self.command and node.data.get('command'):
917 self.command = node.data['command']
918 if self.read_only is None and node.data.get('read_only') is not None:
919 self.read_only = node.data['read_only']
920 if (self.relative_cwd is None and
921 node.data.get('relative_cwd') is not None):
922 self.relative_cwd = node.data['relative_cwd']
923
924
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000925@subcommand.usage('<file1..fileN> or - to read from stdin')
926def CMDarchive(parser, args):
927 """Archives data to the server."""
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000928 options, files = parser.parse_args(args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000929
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000930 if files == ['-']:
931 files = sys.stdin.readlines()
932
933 if not files:
934 parser.error('Nothing to upload')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000935 if not options.isolate_server:
936 parser.error('Nowhere to send. Please specify --isolate-server')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000937
938 # Load the necessary metadata. This is going to be rewritten eventually to be
939 # more efficient.
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000940 algo = hashlib.sha1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000941 infiles = dict(
942 (
943 f,
944 {
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000945 's': os.stat(f).st_size,
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000946 'h': hash_file(f, algo),
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000947 }
948 )
949 for f in files)
950
vadimsh@chromium.orga4326472013-08-24 02:05:41 +0000951 with tools.Profiler('Archive'):
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000952 ret = upload_tree(
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000953 base_url=options.isolate_server,
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000954 indir=os.getcwd(),
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000955 infiles=infiles,
956 namespace=options.namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000957 if not ret:
958 print '\n'.join('%s %s' % (infiles[f]['h'], f) for f in sorted(infiles))
959 return ret
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000960
961
962def CMDdownload(parser, args):
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000963 """Download data from the server.
964
965 It can download individual files.
966 """
967 parser.add_option(
968 '-f', '--file', metavar='HASH DEST', default=[], action='append', nargs=2,
969 help='hash and destination of a file, can be used multiple times')
970 parser.add_option(
971 '-t', '--target', metavar='DIR', default=os.getcwd(),
972 help='destination directory')
973 options, args = parser.parse_args(args)
974 if args:
975 parser.error('Unsupported arguments: %s' % args)
976 if not options.file:
977 parser.error('Use one of --file is required.')
978
979 options.target = os.path.abspath(options.target)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000980 remote = get_storage_api(options.isolate_server, options.namespace)
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000981 for h, dest in options.file:
982 logging.info('%s: %s', h, dest)
maruel@chromium.org8750e4b2013-09-18 02:37:57 +0000983 file_write(
984 os.path.join(options.target, dest),
985 remote.fetch(h, UNKNOWN_FILE_SIZE))
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000986 return 0
987
988
989class OptionParserIsolateServer(tools.OptionParserWithLogging):
990 def __init__(self, **kwargs):
991 tools.OptionParserWithLogging.__init__(self, **kwargs)
992 self.add_option(
993 '-I', '--isolate-server',
994 default=ISOLATE_SERVER,
995 metavar='URL',
996 help='Isolate server where data is stored. default: %default')
997 self.add_option(
998 '--namespace', default='default-gzip',
maruel@chromium.orgb7e79a22013-09-13 01:24:56 +0000999 help='The namespace to use on the server, default: %default')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001000
1001 def parse_args(self, *args, **kwargs):
1002 options, args = tools.OptionParserWithLogging.parse_args(
1003 self, *args, **kwargs)
1004 options.isolate_server = options.isolate_server.rstrip('/')
1005 if not options.isolate_server:
1006 self.error('--isolate-server is required.')
1007 return options, args
1008
1009
1010def main(args):
1011 dispatcher = subcommand.CommandDispatcher(__name__)
1012 try:
1013 return dispatcher.execute(
1014 OptionParserIsolateServer(version=__version__), args)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001015 except (ConfigError, MappingError) as e:
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001016 sys.stderr.write('\nError: ')
1017 sys.stderr.write(str(e))
1018 sys.stderr.write('\n')
1019 return 1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001020
1021
1022if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00001023 fix_encoding.fix_encoding()
1024 tools.disable_buffering()
1025 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001026 sys.exit(main(sys.argv[1:]))