blob: 1507a225a3a5a5d84d5433fad84673fd63f33730 [file] [log] [blame]
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00001#!/usr/bin/env python
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +00003# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Archives a set of files to a server."""
7
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00008__version__ = '0.1.1'
maruel@chromium.orgfb78d432013-08-28 21:22:40 +00009
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000010import binascii
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +000011import cStringIO
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000012import hashlib
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +000013import itertools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000014import logging
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000015import os
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000016import Queue
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000017import sys
18import time
maruel@chromium.orge82112e2013-04-24 14:41:55 +000019import urllib
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000020import zlib
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000021
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000022from third_party import colorama
23from third_party.depot_tools import fix_encoding
24from third_party.depot_tools import subcommand
25
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000026from utils import net
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000027from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000028from utils import tools
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000029
30
maruel@chromium.orgfb78d432013-08-28 21:22:40 +000031# Default server.
32# TODO(maruel): Chromium-specific.
33ISOLATE_SERVER = 'https://isolateserver-dev.appspot.com/'
34
35
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000036# The minimum size of files to upload directly to the blobstore.
maruel@chromium.orgaef29f82012-12-12 15:00:42 +000037MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 1024
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000038
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +000039# The number of files to check the isolate server per /contains query.
40# All files are sorted by likelihood of a change in the file content
41# (currently file size is used to estimate this: larger the file -> larger the
42# possibility it has changed). Then first ITEMS_PER_CONTAINS_QUERIES[0] files
43# are taken and send to '/contains', then next ITEMS_PER_CONTAINS_QUERIES[1],
44# and so on. Numbers here is a trade-off; the more per request, the lower the
45# effect of HTTP round trip latency and TCP-level chattiness. On the other hand,
46# larger values cause longer lookups, increasing the initial latency to start
47# uploading, which is especially an issue for large files. This value is
48# optimized for the "few thousands files to look up with minimal number of large
49# files missing" case.
50ITEMS_PER_CONTAINS_QUERIES = [20, 20, 50, 50, 50, 100]
csharp@chromium.org07fa7592013-01-11 18:19:30 +000051
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +000052# A list of already compressed extension types that should not receive any
53# compression before being uploaded.
54ALREADY_COMPRESSED_TYPES = [
55 '7z', 'avi', 'cur', 'gif', 'h264', 'jar', 'jpeg', 'jpg', 'pdf', 'png',
56 'wav', 'zip'
57]
58
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000059
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000060# The file size to be used when we don't know the correct file size,
61# generally used for .isolated files.
62UNKNOWN_FILE_SIZE = None
63
64
65# The size of each chunk to read when downloading and unzipping files.
66ZIPPED_FILE_CHUNK = 16 * 1024
67
68
69class ConfigError(ValueError):
70 """Generic failure to load a .isolated file."""
71 pass
72
73
74class MappingError(OSError):
75 """Failed to recreate the tree."""
76 pass
77
78
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +000079def randomness():
80 """Generates low-entropy randomness for MIME encoding.
81
82 Exists so it can be mocked out in unit tests.
83 """
84 return str(time.time())
85
86
maruel@chromium.orgc6f90062012-11-07 18:32:22 +000087def encode_multipart_formdata(fields, files,
88 mime_mapper=lambda _: 'application/octet-stream'):
89 """Encodes a Multipart form data object.
90
91 Args:
92 fields: a sequence (name, value) elements for
93 regular form fields.
94 files: a sequence of (name, filename, value) elements for data to be
95 uploaded as files.
96 mime_mapper: function to return the mime type from the filename.
97 Returns:
98 content_type: for httplib.HTTP instance
99 body: for httplib.HTTP instance
100 """
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000101 boundary = hashlib.md5(randomness()).hexdigest()
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000102 body_list = []
103 for (key, value) in fields:
104 if isinstance(key, unicode):
105 value = key.encode('utf-8')
106 if isinstance(value, unicode):
107 value = value.encode('utf-8')
108 body_list.append('--' + boundary)
109 body_list.append('Content-Disposition: form-data; name="%s"' % key)
110 body_list.append('')
111 body_list.append(value)
112 body_list.append('--' + boundary)
113 body_list.append('')
114 for (key, filename, value) in files:
115 if isinstance(key, unicode):
116 value = key.encode('utf-8')
117 if isinstance(filename, unicode):
118 value = filename.encode('utf-8')
119 if isinstance(value, unicode):
120 value = value.encode('utf-8')
121 body_list.append('--' + boundary)
122 body_list.append('Content-Disposition: form-data; name="%s"; '
123 'filename="%s"' % (key, filename))
124 body_list.append('Content-Type: %s' % mime_mapper(filename))
125 body_list.append('')
126 body_list.append(value)
127 body_list.append('--' + boundary)
128 body_list.append('')
129 if body_list:
130 body_list[-2] += '--'
131 body = '\r\n'.join(body_list)
132 content_type = 'multipart/form-data; boundary=%s' % boundary
133 return content_type, body
134
135
maruel@chromium.org037758d2012-12-10 17:59:46 +0000136def sha1_file(filepath):
137 """Calculates the SHA-1 of a file without reading it all in memory at once."""
138 digest = hashlib.sha1()
139 with open(filepath, 'rb') as f:
140 while True:
141 # Read in 1mb chunks.
142 chunk = f.read(1024*1024)
143 if not chunk:
144 break
145 digest.update(chunk)
146 return digest.hexdigest()
147
148
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000149def valid_file(filepath, size):
150 """Determines if the given files appears valid.
151
152 Currently it just checks the file's size.
153 """
154 if size == UNKNOWN_FILE_SIZE:
155 return True
156 actual_size = os.stat(filepath).st_size
157 if size != actual_size:
158 logging.warning(
159 'Found invalid item %s; %d != %d',
160 os.path.basename(filepath), actual_size, size)
161 return False
162 return True
163
164
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000165def url_read(url, **kwargs):
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000166 result = net.url_read(url, **kwargs)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000167 if result is None:
maruel@chromium.orgef333122013-03-12 20:36:40 +0000168 # If we get no response from the server, assume it is down and raise an
169 # exception.
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000170 raise MappingError('Unable to connect to server %s' % url)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000171 return result
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000172
173
maruel@chromium.orgdc359e62013-03-14 13:08:55 +0000174def upload_hash_content_to_blobstore(
175 generate_upload_url, data, hash_key, content):
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000176 """Uploads the given hash contents directly to the blobstore via a generated
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000177 url.
178
179 Arguments:
180 generate_upload_url: The url to get the new upload url from.
maruel@chromium.orgdc359e62013-03-14 13:08:55 +0000181 data: extra POST data.
182 hash_key: sha1 of the uncompressed version of content.
183 content: The contents to upload. Must fit in memory for now.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000184 """
185 logging.debug('Generating url to directly upload file to blobstore')
maruel@chromium.org92a3d2e2012-12-20 16:22:29 +0000186 assert isinstance(hash_key, str), hash_key
187 assert isinstance(content, str), (hash_key, content)
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000188 # TODO(maruel): Support large files. This would require streaming support.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000189 content_type, body = encode_multipart_formdata(
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000190 data, [('content', hash_key, content)])
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000191 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000192 # Retry HTTP 50x here.
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000193 upload_url = net.url_read(generate_upload_url, data=data)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000194 if not upload_url:
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000195 raise MappingError(
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000196 'Unable to connect to server %s' % generate_upload_url)
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000197
198 # Do not retry this request on HTTP 50x. Regenerate an upload url each time
199 # since uploading "consumes" the upload url.
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000200 result = net.url_read(
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000201 upload_url, data=body, content_type=content_type, retry_50x=False)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000202 if result is not None:
203 return result
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000204 raise MappingError(
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000205 'Unable to connect to server %s' % generate_upload_url)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000206
207
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000208class IsolateServer(object):
209 def __init__(self, base_url, namespace):
210 assert base_url.startswith('http'), base_url
211 self.content_url = base_url.rstrip('/') + '/content/'
212 self.namespace = namespace
213 # TODO(maruel): Make this request much earlier asynchronously while the
214 # files are being enumerated.
215 self.token = urllib.quote(url_read(self.content_url + 'get_token'))
maruel@chromium.orgc2bfef42013-08-30 21:46:26 +0000216
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000217 def store(self, content, hash_key):
218 # TODO(maruel): Detect failures.
219 hash_key = str(hash_key)
220 if len(content) > MIN_SIZE_FOR_DIRECT_BLOBSTORE:
221 url = '%sgenerate_blobstore_url/%s/%s' % (
222 self.content_url, self.namespace, hash_key)
223 # token is guaranteed to be already quoted but it is unnecessary here, and
224 # only here.
225 data = [('token', urllib.unquote(self.token))]
226 return upload_hash_content_to_blobstore(url, data, hash_key, content)
227 else:
228 url = '%sstore/%s/%s?token=%s' % (
229 self.content_url, self.namespace, hash_key, self.token)
230 return url_read(
231 url, data=content, content_type='application/octet-stream')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000232
233
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000234def check_files_exist_on_server(query_url, queries):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000235 """Queries the server to see which files from this batch already exist there.
236
237 Arguments:
238 queries: The hash files to potential upload to the server.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000239 Returns:
240 missing_files: list of files that are missing on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000241 """
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000242 # TODO(maruel): Move inside IsolateServer.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000243 logging.info('Checking existence of %d files...', len(queries))
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000244 body = ''.join(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000245 (binascii.unhexlify(meta_data['h']) for (_, meta_data) in queries))
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000246 assert (len(body) % 20) == 0, repr(body)
247
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000248 response = url_read(
249 query_url, data=body, content_type='application/octet-stream')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000250 if len(queries) != len(response):
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000251 raise MappingError(
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000252 'Got an incorrect number of responses from the server. Expected %d, '
253 'but got %d' % (len(queries), len(response)))
254
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000255 missing_files = [
256 queries[i] for i, flag in enumerate(response) if flag == chr(0)
257 ]
258 logging.info('Queried %d files, %d cache hit',
259 len(queries), len(queries) - len(missing_files))
260 return missing_files
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000261
262
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000263class RemoteOperation(object):
264 """Priority based worker queue to operate on action items.
265
266 It execute a function with the given task items. It is specialized to download
267 files.
268
269 When the priority of items is equals, works in strict FIFO mode.
270 """
271 # Initial and maximum number of worker threads.
272 INITIAL_WORKERS = 2
273 MAX_WORKERS = 16
274 # Priorities.
275 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
276 INTERNAL_PRIORITY_BITS = (1<<8) - 1
277 RETRIES = 5
278
279 def __init__(self, do_item):
280 # Function to fetch a remote object or upload to a remote location.
281 self._do_item = do_item
282 # Contains tuple(priority, obj).
283 self._done = Queue.PriorityQueue()
284 self._pool = threading_utils.ThreadPool(
285 self.INITIAL_WORKERS, self.MAX_WORKERS, 0, 'remote')
286
287 def join(self):
288 """Blocks until the queue is empty."""
289 return self._pool.join()
290
291 def close(self):
292 """Terminates all worker threads."""
293 self._pool.close()
294
295 def add_item(self, priority, obj, dest, size):
296 """Retrieves an object from the remote data store.
297
298 The smaller |priority| gets fetched first.
299
300 Thread-safe.
301 """
302 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
303 return self._add_item(priority, obj, dest, size)
304
305 def _add_item(self, priority, obj, dest, size):
306 assert isinstance(obj, basestring), obj
307 assert isinstance(dest, basestring), dest
308 assert size is None or isinstance(size, int), size
309 return self._pool.add_task(
310 priority, self._task_executer, priority, obj, dest, size)
311
312 def get_one_result(self):
313 return self._pool.get_one_result()
314
315 def _task_executer(self, priority, obj, dest, size):
316 """Wraps self._do_item to trap and retry on IOError exceptions."""
317 try:
318 self._do_item(obj, dest)
319 if size and not valid_file(dest, size):
320 download_size = os.stat(dest).st_size
321 os.remove(dest)
322 raise IOError('File incorrect size after download of %s. Got %s and '
323 'expected %s' % (obj, download_size, size))
324 # TODO(maruel): Technically, we'd want to have an output queue to be a
325 # PriorityQueue.
326 return obj
327 except IOError as e:
328 logging.debug('Caught IOError: %s', e)
329 # Remove unfinished download.
330 if os.path.exists(dest):
331 os.remove(dest)
332 # Retry a few times, lowering the priority.
333 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
334 self._add_item(priority + 1, obj, dest, size)
335 return
336 raise
337
338
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000339def compression_level(filename):
340 """Given a filename calculates the ideal compression level to use."""
341 file_ext = os.path.splitext(filename)[1].lower()
342 # TODO(csharp): Profile to find what compression level works best.
343 return 0 if file_ext in ALREADY_COMPRESSED_TYPES else 7
344
345
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000346def read_and_compress(filepath, level):
347 """Reads a file and returns its content gzip compressed."""
348 compressor = zlib.compressobj(level)
349 compressed_data = cStringIO.StringIO()
350 with open(filepath, 'rb') as f:
351 while True:
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000352 chunk = f.read(ZIPPED_FILE_CHUNK)
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000353 if not chunk:
354 break
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000355 compressed_data.write(compressor.compress(chunk))
356 compressed_data.write(compressor.flush(zlib.Z_FINISH))
357 value = compressed_data.getvalue()
358 compressed_data.close()
359 return value
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000360
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000361
362def zip_and_trigger_upload(infile, metadata, upload_function):
363 # TODO(csharp): Fix crbug.com/150823 and enable the touched logic again.
364 # if not metadata['T']:
365 compressed_data = read_and_compress(infile, compression_level(infile))
366 priority = (
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000367 RemoteOperation.HIGH if metadata.get('priority', '1') == '0'
368 else RemoteOperation.MED)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000369 return upload_function(priority, compressed_data, metadata['h'], None)
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000370
371
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000372def batch_files_for_check(infiles):
373 """Splits list of files to check for existence on the server into batches.
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000374
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000375 Each batch corresponds to a single 'exists?' query to the server.
376
377 Yields:
378 batches: list of batches, each batch is a list of files.
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000379 """
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +0000380 batch_count = 0
381 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[0]
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000382 next_queries = []
csharp@chromium.org90c45812013-01-23 14:27:21 +0000383 items = ((k, v) for k, v in infiles.iteritems() if 's' in v)
384 for relfile, metadata in sorted(items, key=lambda x: -x[1]['s']):
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000385 next_queries.append((relfile, metadata))
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +0000386 if len(next_queries) == batch_size_limit:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000387 yield next_queries
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000388 next_queries = []
vadimsh@chromium.orgeea52422013-08-21 19:35:54 +0000389 batch_count += 1
390 batch_size_limit = ITEMS_PER_CONTAINS_QUERIES[
391 min(batch_count, len(ITEMS_PER_CONTAINS_QUERIES) - 1)]
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000392 if next_queries:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000393 yield next_queries
394
395
396def get_files_to_upload(contains_hash_url, infiles):
397 """Yields files that are missing on the server."""
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000398 with threading_utils.ThreadPool(1, 16, 0, prefix='get_files_to_upload') as tp:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000399 for files in batch_files_for_check(infiles):
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000400 tp.add_task(0, check_files_exist_on_server, contains_hash_url, files)
401 for missing_file in itertools.chain.from_iterable(tp.iter_results()):
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000402 yield missing_file
maruel@chromium.org35fc0c82013-01-17 15:14:14 +0000403
404
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000405def upload_sha1_tree(base_url, indir, infiles, namespace):
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000406 """Uploads the given tree to the given url.
407
408 Arguments:
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000409 base_url: The base url, it is assume that |base_url|/has/ can be used to
410 query if an element was already uploaded, and |base_url|/store/
411 can be used to upload a new element.
412 indir: Root directory the infiles are based in.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000413 infiles: dict of files to upload files from |indir| to |base_url|.
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000414 namespace: The namespace to use on the server.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000415 """
416 logging.info('upload tree(base_url=%s, indir=%s, files=%d)' %
417 (base_url, indir, len(infiles)))
maruel@chromium.org034e3962013-03-13 13:34:25 +0000418
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000419 # Create a pool of workers to zip and upload any files missing from
420 # the server.
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000421 num_threads = threading_utils.num_processors()
422 zipping_pool = threading_utils.ThreadPool(min(2, num_threads),
423 num_threads, 0, 'zip')
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000424 remote = IsolateServer(base_url, namespace)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000425 remote_uploader = RemoteOperation(remote.store)
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000426
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000427 # Starts the zip and upload process for files that are missing
428 # from the server.
maruel@chromium.org3e42ce82013-09-12 18:36:59 +0000429 contains_hash_url = '%scontains/%s?token=%s' % (
430 remote.content_url, namespace, remote.token)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000431 uploaded = []
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000432 for relfile, metadata in get_files_to_upload(contains_hash_url, infiles):
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000433 infile = os.path.join(indir, relfile)
maruel@chromium.org831958f2013-01-22 15:01:46 +0000434 zipping_pool.add_task(0, zip_and_trigger_upload, infile, metadata,
csharp@chromium.org07fa7592013-01-11 18:19:30 +0000435 remote_uploader.add_item)
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000436 uploaded.append((relfile, metadata))
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000437
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000438 logging.info('Waiting for all files to finish zipping')
439 zipping_pool.join()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000440 zipping_pool.close()
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000441 logging.info('All files zipped.')
442
443 logging.info('Waiting for all files to finish uploading')
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000444 # Will raise if any exception occurred.
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000445 remote_uploader.join()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000446 remote_uploader.close()
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000447 logging.info('All files are uploaded')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000448
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000449 total = len(infiles)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000450 total_size = sum(metadata.get('s', 0) for metadata in infiles.itervalues())
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000451 logging.info(
452 'Total: %6d, %9.1fkb',
453 total,
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000454 sum(m.get('s', 0) for m in infiles.itervalues()) / 1024.)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000455 cache_hit = set(infiles.iterkeys()) - set(x[0] for x in uploaded)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000456 cache_hit_size = sum(infiles[i].get('s', 0) for i in cache_hit)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000457 logging.info(
458 'cache hit: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
459 len(cache_hit),
460 cache_hit_size / 1024.,
461 len(cache_hit) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000462 cache_hit_size * 100. / total_size if total_size else 0)
csharp@chromium.org20a888c2013-01-15 15:06:55 +0000463 cache_miss = uploaded
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000464 cache_miss_size = sum(infiles[i[0]].get('s', 0) for i in cache_miss)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000465 logging.info(
466 'cache miss: %6d, %9.1fkb, %6.2f%% files, %6.2f%% size',
467 len(cache_miss),
468 cache_miss_size / 1024.,
469 len(cache_miss) * 100. / total,
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000470 cache_miss_size * 100. / total_size if total_size else 0)
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000471 return 0
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000472
473
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000474@subcommand.usage('<file1..fileN> or - to read from stdin')
475def CMDarchive(parser, args):
476 """Archives data to the server."""
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000477 options, files = parser.parse_args(args)
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000478
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000479 if files == ['-']:
480 files = sys.stdin.readlines()
481
482 if not files:
483 parser.error('Nothing to upload')
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000484 if not options.isolate_server:
485 parser.error('Nowhere to send. Please specify --isolate-server')
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000486
487 # Load the necessary metadata. This is going to be rewritten eventually to be
488 # more efficient.
489 infiles = dict(
490 (
491 f,
492 {
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000493 's': os.stat(f).st_size,
maruel@chromium.org037758d2012-12-10 17:59:46 +0000494 'h': sha1_file(f),
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000495 }
496 )
497 for f in files)
498
vadimsh@chromium.orga4326472013-08-24 02:05:41 +0000499 with tools.Profiler('Archive'):
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000500 return upload_sha1_tree(
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000501 base_url=options.isolate_server,
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000502 indir=os.getcwd(),
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000503 infiles=infiles,
504 namespace=options.namespace)
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000505 return 0
506
507
508def CMDdownload(parser, args):
509 """Download data from the server."""
510 _options, args = parser.parse_args(args)
511 parser.error('Sorry, it\'s not really supported.')
512 return 0
513
514
515class OptionParserIsolateServer(tools.OptionParserWithLogging):
516 def __init__(self, **kwargs):
517 tools.OptionParserWithLogging.__init__(self, **kwargs)
518 self.add_option(
519 '-I', '--isolate-server',
520 default=ISOLATE_SERVER,
521 metavar='URL',
522 help='Isolate server where data is stored. default: %default')
523 self.add_option(
524 '--namespace', default='default-gzip',
525 help='The namespace to use on the server.')
526
527 def parse_args(self, *args, **kwargs):
528 options, args = tools.OptionParserWithLogging.parse_args(
529 self, *args, **kwargs)
530 options.isolate_server = options.isolate_server.rstrip('/')
531 if not options.isolate_server:
532 self.error('--isolate-server is required.')
533 return options, args
534
535
536def main(args):
537 dispatcher = subcommand.CommandDispatcher(__name__)
538 try:
539 return dispatcher.execute(
540 OptionParserIsolateServer(version=__version__), args)
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000541 except (ConfigError, MappingError) as e:
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000542 sys.stderr.write('\nError: ')
543 sys.stderr.write(str(e))
544 sys.stderr.write('\n')
545 return 1
maruel@chromium.orgc6f90062012-11-07 18:32:22 +0000546
547
548if __name__ == '__main__':
maruel@chromium.orgfb78d432013-08-28 21:22:40 +0000549 fix_encoding.fix_encoding()
550 tools.disable_buffering()
551 colorama.init()
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +0000552 sys.exit(main(sys.argv[1:]))