blob: 730dbe3fe57267a9f6f557d0c4655b9129164250 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000013import functools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000015import httplib
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000016import itertools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000018import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000019import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000020import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000021import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000022import optparse
23import os
24import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000025import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000026import re
27import shutil
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000028import socket
29import ssl
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000030import stat
31import subprocess
32import sys
33import tempfile
34import threading
35import time
36import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000037import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000038import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000039import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000040
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000041from utils import zip_package
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000042from utils import threading_utils
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000043
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000044# Try to import 'upload' module used by AppEngineService for authentication.
45# If it is not there, app engine authentication support will be disabled.
46try:
47 from third_party import upload
48 # Hack out upload logging.info()
49 upload.logging = logging.getLogger('upload')
50 # Mac pylint choke on this line.
51 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
52except ImportError:
53 upload = None
54
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000055
vadimsh@chromium.org85071062013-08-21 23:37:45 +000056# Absolute path to this file (can be None if running from zip on Mac).
57THIS_FILE_PATH = os.path.abspath(__file__) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000058
59# Directory that contains this file (might be inside zip package).
vadimsh@chromium.org85071062013-08-21 23:37:45 +000060BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000061
62# Directory that contains currently running script file.
63MAIN_DIR = os.path.dirname(os.path.abspath(zip_package.get_main_script_path()))
64
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000065# Types of action accepted by link_file().
maruel@chromium.orgba6489b2013-07-11 20:23:33 +000066HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY = range(1, 5)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000067
68RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
69
csharp@chromium.org8dc52542012-11-08 20:29:55 +000070# The file size to be used when we don't know the correct file size,
71# generally used for .isolated files.
72UNKNOWN_FILE_SIZE = None
73
csharp@chromium.orga92403f2012-11-20 15:13:59 +000074# The size of each chunk to read when downloading and unzipping files.
75ZIPPED_FILE_CHUNK = 16 * 1024
76
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000077# The name of the log file to use.
78RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
79
csharp@chromium.orge217f302012-11-22 16:51:53 +000080# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000081RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000082
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000083# The delay (in seconds) to wait between logging statements when retrieving
84# the required files. This is intended to let the user (or buildbot) know that
85# the program is still running.
86DELAY_BETWEEN_UPDATES_IN_SECS = 30
87
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +000088# Maximum expected delay (in seconds) between successive file fetches
89# in run_tha_test. If it takes longer than that, a deadlock might be happening
90# and all stack frames for all threads are dumped to log.
91DEADLOCK_TIMEOUT = 5 * 60
92
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000093# The name of the key to store the count of url attempts.
94COUNT_KEY = 'UrlOpenAttempt'
95
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000096# Default maximum number of attempts to trying opening a url before aborting.
97URL_OPEN_MAX_ATTEMPTS = 30
98# Default timeout when retrying.
99URL_OPEN_TIMEOUT = 6*60.
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000100
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000101# Read timeout in seconds for downloads from isolate storage. If there's no
102# response from the server within this timeout whole download will be aborted.
103DOWNLOAD_READ_TIMEOUT = 60
104
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000105# Global (for now) map: server URL (http://example.com) -> HttpService instance.
106# Used by get_http_service to cache HttpService instances.
107_http_services = {}
108_http_services_lock = threading.Lock()
109
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000110# Used by get_flavor().
111FLAVOR_MAPPING = {
112 'cygwin': 'win',
113 'win32': 'win',
114 'darwin': 'mac',
115 'sunos5': 'solaris',
116 'freebsd7': 'freebsd',
117 'freebsd8': 'freebsd',
118}
119
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000120
121class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000122 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000123 pass
124
125
126class MappingError(OSError):
127 """Failed to recreate the tree."""
128 pass
129
130
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000131class TimeoutError(IOError):
132 """Timeout while reading HTTP response."""
133
134 def __init__(self, inner_exc=None):
135 super(TimeoutError, self).__init__(str(inner_exc or 'Timeout'))
136 self.inner_exc = inner_exc
137
138
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000139def get_as_zip_package(executable=True):
140 """Returns ZipPackage with this module and all its dependencies.
141
142 If |executable| is True will store run_isolated.py as __main__.py so that
143 zip package is directly executable be python.
144 """
145 # Building a zip package when running from another zip package is
146 # unsupported and probably unneeded.
147 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000148 assert THIS_FILE_PATH
149 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000150 package = zip_package.ZipPackage(root=BASE_DIR)
151 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
152 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
153 package.add_directory(os.path.join(BASE_DIR, 'utils'))
154 return package
155
156
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000157def get_flavor():
158 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000159 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000160
161
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000162def fix_default_encoding():
163 """Forces utf8 solidly on all platforms.
164
165 By default python execution environment is lazy and defaults to ascii
166 encoding.
167
168 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
169 """
170 if sys.getdefaultencoding() == 'utf-8':
171 return False
172
173 # Regenerate setdefaultencoding.
174 reload(sys)
175 # Module 'sys' has no 'setdefaultencoding' member
176 # pylint: disable=E1101
177 sys.setdefaultencoding('utf-8')
178 for attr in dir(locale):
179 if attr[0:3] != 'LC_':
180 continue
181 aref = getattr(locale, attr)
182 try:
183 locale.setlocale(aref, '')
184 except locale.Error:
185 continue
186 try:
187 lang = locale.getlocale(aref)[0]
188 except (TypeError, ValueError):
189 continue
190 if lang:
191 try:
192 locale.setlocale(aref, (lang, 'UTF-8'))
193 except locale.Error:
194 os.environ[attr] = lang + '.UTF-8'
195 try:
196 locale.setlocale(locale.LC_ALL, '')
197 except locale.Error:
198 pass
199 return True
200
201
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000202class Unbuffered(object):
203 """Disable buffering on a file object."""
204 def __init__(self, stream):
205 self.stream = stream
206
207 def write(self, data):
208 self.stream.write(data)
209 if '\n' in data:
210 self.stream.flush()
211
212 def __getattr__(self, attr):
213 return getattr(self.stream, attr)
214
215
216def disable_buffering():
217 """Makes this process and child processes stdout unbuffered."""
218 if not os.environ.get('PYTHONUNBUFFERED'):
219 # Since sys.stdout is a C++ object, it's impossible to do
220 # sys.stdout.write = lambda...
221 sys.stdout = Unbuffered(sys.stdout)
222 os.environ['PYTHONUNBUFFERED'] = 'x'
223
224
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000225def os_link(source, link_name):
226 """Add support for os.link() on Windows."""
227 if sys.platform == 'win32':
228 if not ctypes.windll.kernel32.CreateHardLinkW(
229 unicode(link_name), unicode(source), 0):
230 raise OSError()
231 else:
232 os.link(source, link_name)
233
234
235def readable_copy(outfile, infile):
236 """Makes a copy of the file that is readable by everyone."""
csharp@chromium.org59d116d2013-07-05 18:04:08 +0000237 shutil.copy2(infile, outfile)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000238 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
239 stat.S_IRGRP | stat.S_IROTH)
240 os.chmod(outfile, read_enabled_mode)
241
242
243def link_file(outfile, infile, action):
244 """Links a file. The type of link depends on |action|."""
245 logging.debug('Mapping %s to %s' % (infile, outfile))
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000246 if action not in (HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000247 raise ValueError('Unknown mapping action %s' % action)
248 if not os.path.isfile(infile):
249 raise MappingError('%s is missing' % infile)
250 if os.path.isfile(outfile):
251 raise MappingError(
252 '%s already exist; insize:%d; outsize:%d' %
253 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
254
255 if action == COPY:
256 readable_copy(outfile, infile)
257 elif action == SYMLINK and sys.platform != 'win32':
258 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000259 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000260 else:
261 try:
262 os_link(infile, outfile)
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000263 except OSError as e:
264 if action == HARDLINK:
265 raise MappingError(
266 'Failed to hardlink %s to %s: %s' % (infile, outfile, e))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000267 # Probably a different file system.
maruel@chromium.org9e98e432013-05-31 17:06:51 +0000268 logging.warning(
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000269 'Failed to hardlink, failing back to copy %s to %s' % (
270 infile, outfile))
271 readable_copy(outfile, infile)
272
273
274def _set_write_bit(path, read_only):
275 """Sets or resets the executable bit on a file or directory."""
276 mode = os.lstat(path).st_mode
277 if read_only:
278 mode = mode & 0500
279 else:
280 mode = mode | 0200
281 if hasattr(os, 'lchmod'):
282 os.lchmod(path, mode) # pylint: disable=E1101
283 else:
284 if stat.S_ISLNK(mode):
285 # Skip symlink without lchmod() support.
286 logging.debug('Can\'t change +w bit on symlink %s' % path)
287 return
288
289 # TODO(maruel): Implement proper DACL modification on Windows.
290 os.chmod(path, mode)
291
292
293def make_writable(root, read_only):
294 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000295 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000296 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
297 for filename in filenames:
298 _set_write_bit(os.path.join(dirpath, filename), read_only)
299
300 for dirname in dirnames:
301 _set_write_bit(os.path.join(dirpath, dirname), read_only)
302
303
304def rmtree(root):
305 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
306 make_writable(root, False)
307 if sys.platform == 'win32':
308 for i in range(3):
309 try:
310 shutil.rmtree(root)
311 break
312 except WindowsError: # pylint: disable=E0602
313 delay = (i+1)*2
314 print >> sys.stderr, (
315 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
316 time.sleep(delay)
317 else:
318 shutil.rmtree(root)
319
320
321def is_same_filesystem(path1, path2):
322 """Returns True if both paths are on the same filesystem.
323
324 This is required to enable the use of hardlinks.
325 """
326 assert os.path.isabs(path1), path1
327 assert os.path.isabs(path2), path2
328 if sys.platform == 'win32':
329 # If the drive letter mismatches, assume it's a separate partition.
330 # TODO(maruel): It should look at the underlying drive, a drive letter could
331 # be a mount point to a directory on another drive.
332 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
333 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
334 if path1[0].lower() != path2[0].lower():
335 return False
336 return os.stat(path1).st_dev == os.stat(path2).st_dev
337
338
339def get_free_space(path):
340 """Returns the number of free bytes."""
341 if sys.platform == 'win32':
342 free_bytes = ctypes.c_ulonglong(0)
343 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
344 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
345 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000346 # For OSes other than Windows.
347 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000348 return f.f_bfree * f.f_frsize
349
350
351def make_temp_dir(prefix, root_dir):
352 """Returns a temporary directory on the same file system as root_dir."""
353 base_temp_dir = None
354 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
355 base_temp_dir = os.path.dirname(root_dir)
356 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
357
358
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000359def load_isolated(content, os_flavor=None):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000360 """Verifies the .isolated file is valid and loads this object with the json
361 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000362 """
363 try:
364 data = json.loads(content)
365 except ValueError:
366 raise ConfigError('Failed to parse: %s...' % content[:100])
367
368 if not isinstance(data, dict):
369 raise ConfigError('Expected dict, got %r' % data)
370
371 for key, value in data.iteritems():
372 if key == 'command':
373 if not isinstance(value, list):
374 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000375 if not value:
376 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000377 for subvalue in value:
378 if not isinstance(subvalue, basestring):
379 raise ConfigError('Expected string, got %r' % subvalue)
380
381 elif key == 'files':
382 if not isinstance(value, dict):
383 raise ConfigError('Expected dict, got %r' % value)
384 for subkey, subvalue in value.iteritems():
385 if not isinstance(subkey, basestring):
386 raise ConfigError('Expected string, got %r' % subkey)
387 if not isinstance(subvalue, dict):
388 raise ConfigError('Expected dict, got %r' % subvalue)
389 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000390 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000391 if not isinstance(subsubvalue, basestring):
392 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000393 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000394 if not isinstance(subsubvalue, int):
395 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000396 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000397 if not RE_IS_SHA1.match(subsubvalue):
398 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000399 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000400 if not isinstance(subsubvalue, int):
401 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000402 else:
403 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000404 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000405 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000406 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
407 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000408
409 elif key == 'includes':
410 if not isinstance(value, list):
411 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000412 if not value:
413 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000414 for subvalue in value:
415 if not RE_IS_SHA1.match(subvalue):
416 raise ConfigError('Expected sha-1, got %r' % subvalue)
417
418 elif key == 'read_only':
419 if not isinstance(value, bool):
420 raise ConfigError('Expected bool, got %r' % value)
421
422 elif key == 'relative_cwd':
423 if not isinstance(value, basestring):
424 raise ConfigError('Expected string, got %r' % value)
425
426 elif key == 'os':
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000427 expected_value = os_flavor or get_flavor()
428 if value != expected_value:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000429 raise ConfigError(
430 'Expected \'os\' to be \'%s\' but got \'%s\'' %
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000431 (expected_value, value))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000432
433 else:
434 raise ConfigError('Unknown key %s' % key)
435
436 return data
437
438
439def fix_python_path(cmd):
440 """Returns the fixed command line to call the right python executable."""
441 out = cmd[:]
442 if out[0] == 'python':
443 out[0] = sys.executable
444 elif out[0].endswith('.py'):
445 out.insert(0, sys.executable)
446 return out
447
448
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000449def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000450 """Attempts to open the given url multiple times.
451
452 |data| can be either:
453 -None for a GET request
454 -str for pre-encoded data
455 -list for data to be encoded
456 -dict for data to be encoded (COUNT_KEY will be added in this case)
457
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000458 Returns HttpResponse object, where the response may be read from, or None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000459 if it was unable to connect.
460 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000461 urlhost, urlpath = split_server_request_url(url)
462 service = get_http_service(urlhost)
463 return service.request(urlpath, **kwargs)
464
465
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000466def url_read(url, **kwargs):
467 """Attempts to open the given url multiple times and read all data from it.
468
469 Accepts same arguments as url_open function.
470
471 Returns all data read or None if it was unable to connect or read the data.
472 """
473 response = url_open(url, **kwargs)
474 if not response:
475 return None
476 try:
477 return response.read()
478 except TimeoutError:
479 return None
480
481
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000482def split_server_request_url(url):
483 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000484 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000485 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
486 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
487 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000488
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000489
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000490def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000491 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000492 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000493 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000494 # Ensure consistency.
495 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000496 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000497 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000498 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000499 service = AppEngineService(urlhost)
500 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000501 return service
502
503
504class HttpService(object):
505 """Base class for a class that provides an API to HTTP based service:
506 - Provides 'request' method.
507 - Supports automatic request retries.
508 - Supports persistent cookies.
509 - Thread safe.
510 """
511
512 # File to use to store all auth cookies.
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000513 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000514
515 # CookieJar reused by all services + lock that protects its instantiation.
516 _cookie_jar = None
517 _cookie_jar_lock = threading.Lock()
518
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000519 def __init__(self, urlhost):
520 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000521 self.cookie_jar = self.load_cookie_jar()
522 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000523
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000524 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000525 """Called when HTTP server asks client to authenticate.
526 Can be implemented in subclasses.
527 """
528 return False
529
530 @staticmethod
531 def load_cookie_jar():
532 """Returns global CoookieJar object that stores cookies in the file."""
533 with HttpService._cookie_jar_lock:
534 if HttpService._cookie_jar is not None:
535 return HttpService._cookie_jar
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000536 jar = ThreadSafeCookieJar(HttpService.COOKIE_FILE)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000537 jar.load()
538 HttpService._cookie_jar = jar
539 return jar
540
541 @staticmethod
542 def save_cookie_jar():
543 """Called when cookie jar needs to be flushed to disk."""
544 with HttpService._cookie_jar_lock:
545 if HttpService._cookie_jar is not None:
546 HttpService._cookie_jar.save()
547
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000548 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000549 """Returns OpenerDirector that will be used when sending requests.
550 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000551 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000552
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000553 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000554 """Attempts to open the given url multiple times.
555
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000556 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000557
558 |data| can be either:
559 -None for a GET request
560 -str for pre-encoded data
561 -list for data to be encoded
562 -dict for data to be encoded (COUNT_KEY will be added in this case)
563
564 Returns a file-like object, where the response may be read from, or None
565 if it was unable to connect.
566 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000567 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000568
569 if isinstance(data, dict) and COUNT_KEY in data:
570 logging.error('%s already existed in the data passed into UlrOpen. It '
571 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
572 return None
573
574 method = 'GET' if data is None else 'POST'
575 assert not ((method != 'POST') and content_type), (
576 'Can\'t use content_type on GET')
577
578 def make_request(extra):
579 """Returns a urllib2.Request instance for this specific retry."""
580 if isinstance(data, str) or data is None:
581 payload = data
582 else:
583 if isinstance(data, dict):
584 payload = data.items()
585 else:
586 payload = data[:]
587 payload.extend(extra.iteritems())
588 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000589 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000590 if isinstance(data, str) or data is None:
591 # In these cases, add the extra parameter to the query part of the url.
592 url_parts = list(urlparse.urlparse(new_url))
593 # Append the query parameter.
594 if url_parts[4] and extra:
595 url_parts[4] += '&'
596 url_parts[4] += urllib.urlencode(extra)
597 new_url = urlparse.urlunparse(url_parts)
598 request = urllib2.Request(new_url, data=payload)
599 if payload is not None:
600 if content_type:
601 request.add_header('Content-Type', content_type)
602 request.add_header('Content-Length', len(payload))
603 return request
604
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000605 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000606
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000607 def _retry_loop(
608 self,
609 make_request,
610 max_attempts=URL_OPEN_MAX_ATTEMPTS,
611 retry_404=False,
612 retry_50x=True,
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000613 timeout=URL_OPEN_TIMEOUT,
614 read_timeout=None):
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000615 """Runs internal request-retry loop.
616
617 - Optionally retries HTTP 404 and 50x.
618 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
619 number of retries.
620 - Retries up to |timeout| duration in seconds. If None or 0, there's no
621 limit in the time taken to do retries.
622 - If both |max_attempts| and |timeout| are None or 0, this functions retries
623 indefinitely.
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000624
625 If |read_timeout| is not None will configure underlying socket to
626 raise TimeoutError exception whenever there's no response from the server
627 for more than |read_timeout| seconds. It can happen during any read
628 operation so once you pass non-None |read_timeout| be prepared to handle
629 these exceptions in subsequent reads from the stream.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000630 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000631 authenticated = False
632 last_error = None
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000633 attempt = 0
634 start = self._now()
635 for attempt in itertools.count():
636 if max_attempts and attempt >= max_attempts:
637 # Too many attempts.
638 break
639 if timeout and (self._now() - start) >= timeout:
640 # Retried for too long.
641 break
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000642 extra = {COUNT_KEY: attempt} if attempt else {}
643 request = make_request(extra)
644 try:
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000645 url_response = self._url_open(request, timeout=read_timeout)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000646 logging.debug('url_open(%s) succeeded', request.get_full_url())
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000647 return HttpResponse(url_response, request.get_full_url())
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000648 except urllib2.HTTPError as e:
649 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000650 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000651 # Try to authenticate only once. If it doesn't help, then server does
652 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000653 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000654 'Authentication is required for %s on attempt %d.\n%s',
655 request.get_full_url(), attempt,
656 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000657 if not authenticated and self.authenticate():
658 authenticated = True
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000659 # Do not sleep.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000660 continue
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000661 # If authentication failed, return.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000662 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000663 'Unable to authenticate to %s.\n%s',
664 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000665 return None
666
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000667 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
668 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000669 # This HTTPError means we reached the server and there was a problem
670 # with the request, so don't retry.
671 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000672 'Able to connect to %s but an exception was thrown.\n%s',
673 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000674 return None
675
676 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000677 logging.warning('Able to connect to %s on attempt %d.\n%s',
678 request.get_full_url(), attempt,
679 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000680 last_error = e
681
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000682 except (urllib2.URLError, httplib.HTTPException,
683 socket.timeout, ssl.SSLError) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000684 logging.warning('Unable to open url %s on attempt %d.\n%s',
685 request.get_full_url(), attempt,
686 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000687 last_error = e
688
689 # Only sleep if we are going to try again.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000690 if max_attempts and attempt != max_attempts:
691 remaining = None
692 if timeout:
693 remaining = timeout - (self._now() - start)
694 if remaining <= 0:
695 break
696 self.sleep_before_retry(attempt, remaining)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000697
698 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000699 request.get_full_url(), max_attempts,
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000700 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000701 return None
702
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000703 def _url_open(self, request, timeout=None):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000704 """Low level method to execute urllib2.Request's.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000705
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000706 To be mocked in tests.
707 """
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000708 if timeout is not None:
709 return self.opener.open(request, timeout=timeout)
710 else:
711 # Leave original default value for |timeout|. It's nontrivial.
712 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000713
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000714 @staticmethod
715 def _now():
716 """To be mocked in tests."""
717 return time.time()
718
719 @staticmethod
720 def calculate_sleep_before_retry(attempt, max_duration):
721 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
722 # survive.
723 MAX_SLEEP = 10.
724 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
725 # time by starting with 1.5/2+1.5^-1 median offset.
726 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
727 assert duration > 0.1
728 duration = min(MAX_SLEEP, duration)
729 if max_duration:
730 duration = min(max_duration, duration)
731 return duration
732
733 @classmethod
734 def sleep_before_retry(cls, attempt, max_duration):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000735 """Sleeps for some amount of time when retrying the request.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000736
737 To be mocked in tests.
738 """
739 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
maruel@chromium.orgef333122013-03-12 20:36:40 +0000740
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000741 @staticmethod
742 def _format_exception(exc, verbose=False):
743 """Given an instance of some exception raised by urlopen returns human
744 readable piece of text with detailed information about the error.
745 """
746 out = ['Exception: %s' % (exc,)]
747 if verbose:
748 if isinstance(exc, urllib2.HTTPError):
749 out.append('-' * 10)
750 if exc.hdrs:
751 for header, value in exc.hdrs.items():
752 if not header.startswith('x-'):
753 out.append('%s: %s' % (header.capitalize(), value))
754 out.append('')
755 out.append(exc.read() or '<empty body>')
756 out.append('-' * 10)
757 return '\n'.join(out)
758
maruel@chromium.orgef333122013-03-12 20:36:40 +0000759
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000760class HttpResponse(object):
761 """Response from HttpService."""
762
763 def __init__(self, url_response, url):
764 self._url_response = url_response
765 self._url = url
766 self._read = 0
767
768 @property
769 def content_length(self):
770 """Total length to the response or None if not known in advance."""
771 length = self._url_response.headers.get('Content-Length')
772 return int(length) if length is not None else None
773
774 def read(self, size=None):
775 """Reads up to |size| bytes from the stream and returns them.
776
777 If |size| is None reads all available bytes.
778
779 Raises TimeoutError on read timeout.
780 """
781 try:
782 data = self._url_response.read(size)
783 self._read += len(data)
784 return data
785 except (socket.timeout, ssl.SSLError) as e:
786 logging.error('Timeout while reading from %s, read %d of %s: %s',
787 self._url, self._read, self.content_length, e)
788 raise TimeoutError(e)
789
790
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000791class AppEngineService(HttpService):
792 """This class implements authentication support for
793 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000794 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000795
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000796 # This lock ensures that user won't be confused with multiple concurrent
797 # login prompts.
798 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000799
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000800 def __init__(self, urlhost, email=None, password=None):
801 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000802 self.email = email
803 self.password = password
804 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000805
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000806 def authenticate(self):
807 """Authenticates in the app engine application.
808 Returns True on success.
809 """
810 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000811 logging.error('\'upload\' module is missing, '
812 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000813 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000814 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000815 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000816 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000817 class AuthServer(upload.AbstractRpcServer):
818 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000819 # Authentication code needs to know about 302 response.
820 # So make OpenerDirector without HTTPRedirectHandler.
821 opener = urllib2.OpenerDirector()
822 opener.add_handler(urllib2.ProxyHandler())
823 opener.add_handler(urllib2.UnknownHandler())
824 opener.add_handler(urllib2.HTTPHandler())
825 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
826 opener.add_handler(urllib2.HTTPSHandler())
827 opener.add_handler(urllib2.HTTPErrorProcessor())
828 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000829 return opener
830 def PerformAuthentication(self):
831 self._Authenticate()
832 save_cookie_jar()
833 return self.authenticated
834 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000835 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000836 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000837
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000838 def get_credentials(self):
839 """Called during authentication process to get the credentials.
840 May be called mutliple times if authentication fails.
841 Returns tuple (email, password).
842 """
843 # 'authenticate' calls this only if 'upload' is present.
844 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
845 assert upload, '\'upload\' module is required for this to work'
846 if self.email and self.password:
847 return (self.email, self.password)
848 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000849 self._keyring = upload.KeyringCreds(self.urlhost,
850 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000851 self.email)
852 return self._keyring.GetUserCredentials()
853
854
855class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
856 """MozillaCookieJar with thread safe load and save."""
857
858 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
859 """Loads cookies from the file if it exists."""
maruel@chromium.org4e2676d2013-06-06 18:39:48 +0000860 filename = os.path.expanduser(filename or self.filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000861 with self._cookies_lock:
862 if os.path.exists(filename):
863 try:
864 cookielib.MozillaCookieJar.load(self, filename,
865 ignore_discard,
866 ignore_expires)
867 logging.debug('Loaded cookies from %s', filename)
868 except (cookielib.LoadError, IOError):
869 pass
870 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000871 try:
872 fd = os.open(filename, os.O_CREAT, 0600)
873 os.close(fd)
874 except OSError:
875 logging.error('Failed to create %s', filename)
876 try:
877 os.chmod(filename, 0600)
878 except OSError:
879 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000880
881 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
882 """Saves cookies to the file, completely overwriting it."""
883 logging.debug('Saving cookies to %s', filename or self.filename)
884 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000885 try:
886 cookielib.MozillaCookieJar.save(self, filename,
887 ignore_discard,
888 ignore_expires)
889 except OSError:
890 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000891
892
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000893def valid_file(filepath, size):
894 """Determines if the given files appears valid (currently it just checks
895 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000896 if size == UNKNOWN_FILE_SIZE:
897 return True
898 actual_size = os.stat(filepath).st_size
899 if size != actual_size:
900 logging.warning(
901 'Found invalid item %s; %d != %d',
902 os.path.basename(filepath), actual_size, size)
903 return False
904 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000905
906
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000907class Profiler(object):
908 def __init__(self, name):
909 self.name = name
910 self.start_time = None
911
912 def __enter__(self):
913 self.start_time = time.time()
914 return self
915
916 def __exit__(self, _exc_type, _exec_value, _traceback):
917 time_taken = time.time() - self.start_time
918 logging.info('Profiling: Section %s took %3.3f seconds',
919 self.name, time_taken)
920
921
922class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000923 """Priority based worker queue to fetch or upload files from a
924 content-address server. Any function may be given as the fetcher/upload,
925 as long as it takes two inputs (the item contents, and their relative
926 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000927
928 Supports local file system, CIFS or http remotes.
929
930 When the priority of items is equals, works in strict FIFO mode.
931 """
932 # Initial and maximum number of worker threads.
933 INITIAL_WORKERS = 2
934 MAX_WORKERS = 16
935 # Priorities.
936 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
937 INTERNAL_PRIORITY_BITS = (1<<8) - 1
938 RETRIES = 5
939
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000940 def __init__(self, destination_root):
941 # Function to fetch a remote object or upload to a remote location..
942 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000943 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000944 self._done = Queue.PriorityQueue()
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000945 self._pool = threading_utils.ThreadPool(
946 self.INITIAL_WORKERS, self.MAX_WORKERS, 0, 'remote')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000947
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000948 def join(self):
949 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000950 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000951
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000952 def close(self):
953 """Terminates all worker threads."""
954 self._pool.close()
955
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000956 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000957 """Retrieves an object from the remote data store.
958
959 The smaller |priority| gets fetched first.
960
961 Thread-safe.
962 """
963 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000964 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000965
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000966 def _add_item(self, priority, obj, dest, size):
967 assert isinstance(obj, basestring), obj
968 assert isinstance(dest, basestring), dest
969 assert size is None or isinstance(size, int), size
970 return self._pool.add_task(
971 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000972
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000973 def get_one_result(self):
974 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000975
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000976 def _task_executer(self, priority, obj, dest, size):
977 """Wraps self._do_item to trap and retry on IOError exceptions."""
978 try:
979 self._do_item(obj, dest)
980 if size and not valid_file(dest, size):
981 download_size = os.stat(dest).st_size
982 os.remove(dest)
983 raise IOError('File incorrect size after download of %s. Got %s and '
984 'expected %s' % (obj, download_size, size))
985 # TODO(maruel): Technically, we'd want to have an output queue to be a
986 # PriorityQueue.
987 return obj
988 except IOError as e:
989 logging.debug('Caught IOError: %s', e)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000990 # Remove unfinished download.
991 if os.path.exists(dest):
992 os.remove(dest)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000993 # Retry a few times, lowering the priority.
994 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
995 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000996 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000997 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000998
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000999 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001000 """Returns a object to retrieve objects from a remote."""
1001 if re.match(r'^https?://.+$', file_or_url):
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001002 return functools.partial(self._download_file, file_or_url)
1003 else:
1004 return functools.partial(self._copy_file, file_or_url)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001005
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001006 @staticmethod
1007 def _download_file(base_url, item, dest):
1008 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1009 # easy.
1010 try:
1011 zipped_source = base_url + item
1012 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001013
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001014 # Because the app engine DB is only eventually consistent, retry
1015 # 404 errors because the file might just not be visible yet (even
1016 # though it has been uploaded).
1017 connection = url_open(zipped_source, retry_404=True,
1018 read_timeout=DOWNLOAD_READ_TIMEOUT)
1019 if not connection:
1020 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001021
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001022 content_length = connection.content_length
1023 decompressor = zlib.decompressobj()
1024 size = 0
1025 with open(dest, 'wb') as f:
1026 while True:
1027 chunk = connection.read(ZIPPED_FILE_CHUNK)
1028 if not chunk:
1029 break
1030 size += len(chunk)
1031 f.write(decompressor.decompress(chunk))
1032 # Ensure that all the data was properly decompressed.
1033 uncompressed_data = decompressor.flush()
1034 assert not uncompressed_data
1035 except IOError as e:
1036 logging.error('Failed to download %s at %s.\n%s', item, dest, e)
1037 raise
1038 except httplib.HTTPException as e:
1039 msg = 'HTTPException while retrieving %s at %s.\n%s' % (item, dest, e)
1040 logging.error(msg)
1041 raise IOError(msg)
1042 except zlib.error as e:
1043 msg = 'Corrupted zlib for item %s. Processed %d of %s bytes.\n%s' % (
1044 item, size, content_length, e)
1045 logging.error(msg)
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001046
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001047 # Testing seems to show that if a few machines are trying to download
1048 # the same blob, they can cause each other to fail. So if we hit a
1049 # zip error, this is the most likely cause (it only downloads some of
1050 # the data). Randomly sleep for between 5 and 25 seconds to try and
1051 # spread out the downloads.
1052 # TODO(csharp): Switch from blobstorage to cloud storage and see if
1053 # that solves the issue.
1054 sleep_duration = (random.random() * 20) + 5
1055 time.sleep(sleep_duration)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001056
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001057 raise IOError(msg)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001058
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001059 @staticmethod
1060 def _copy_file(base_path, item, dest):
1061 source = os.path.join(base_path, item)
1062 if source == dest:
1063 logging.info('Source and destination are the same, no action required')
1064 return
1065 logging.debug('copy_file(%s, %s)', source, dest)
1066 shutil.copy(source, dest)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001067
1068
1069class CachePolicies(object):
1070 def __init__(self, max_cache_size, min_free_space, max_items):
1071 """
1072 Arguments:
1073 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1074 cache is effectively a leak.
1075 - min_free_space: Trim if disk free space becomes lower than this value. If
1076 0, it unconditionally fill the disk.
1077 - max_items: Maximum number of items to keep in the cache. If 0, do not
1078 enforce a limit.
1079 """
1080 self.max_cache_size = max_cache_size
1081 self.min_free_space = min_free_space
1082 self.max_items = max_items
1083
1084
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001085class NoCache(object):
1086 """This class is intended to be usable everywhere the Cache class is.
1087 Instead of downloading to a cache, all files are downloaded to the target
1088 directory and then moved to where they are needed.
1089 """
1090
1091 def __init__(self, target_directory, remote):
1092 self.target_directory = target_directory
1093 self.remote = remote
1094
1095 def retrieve(self, priority, item, size):
1096 """Get the request file."""
1097 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001098 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001099
1100 def wait_for(self, items):
1101 """Download the first item of the given list if it is missing."""
1102 item = items.iterkeys().next()
1103
1104 if not os.path.exists(self.path(item)):
1105 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001106 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001107 assert downloaded == item
1108
1109 return item
1110
1111 def path(self, item):
1112 return os.path.join(self.target_directory, item)
1113
1114
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001115class Cache(object):
1116 """Stateful LRU cache.
1117
1118 Saves its state as json file.
1119 """
1120 STATE_FILE = 'state.json'
1121
1122 def __init__(self, cache_dir, remote, policies):
1123 """
1124 Arguments:
1125 - cache_dir: Directory where to place the cache.
1126 - remote: Remote where to fetch items from.
1127 - policies: cache retention policies.
1128 """
1129 self.cache_dir = cache_dir
1130 self.remote = remote
1131 self.policies = policies
1132 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001133 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1134 # self.state[0] is the oldest item.
1135 self.state = []
1136 self._state_need_to_be_saved = False
1137 # A lookup map to speed up searching.
1138 self._lookup = {}
1139 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001140
1141 # Items currently being fetched. Keep it local to reduce lock contention.
1142 self._pending_queue = set()
1143
1144 # Profiling values.
1145 self._added = []
1146 self._removed = []
1147 self._free_disk = 0
1148
maruel@chromium.org770993b2012-12-11 17:16:48 +00001149 with Profiler('Setup'):
1150 if not os.path.isdir(self.cache_dir):
1151 os.makedirs(self.cache_dir)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001152 if os.path.isfile(self.state_file):
1153 try:
1154 self.state = json.load(open(self.state_file, 'r'))
1155 except (IOError, ValueError), e:
1156 # Too bad. The file will be overwritten and the cache cleared.
1157 logging.error(
1158 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1159 self._state_need_to_be_saved = True
1160 if (not isinstance(self.state, list) or
1161 not all(
1162 isinstance(i, (list, tuple)) and len(i) == 2
1163 for i in self.state)):
1164 # Discard.
1165 self._state_need_to_be_saved = True
1166 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001167
maruel@chromium.org770993b2012-12-11 17:16:48 +00001168 # Ensure that all files listed in the state still exist and add new ones.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001169 previous = set(filename for filename, _ in self.state)
1170 if len(previous) != len(self.state):
1171 logging.warning('Cache state is corrupted, found duplicate files')
1172 self._state_need_to_be_saved = True
1173 self.state = []
1174
1175 added = 0
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001176 for filename in os.listdir(self.cache_dir):
1177 if filename == self.STATE_FILE:
1178 continue
1179 if filename in previous:
1180 previous.remove(filename)
1181 continue
1182 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001183 if not RE_IS_SHA1.match(filename):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001184 logging.warning('Removing unknown file %s from cache', filename)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001185 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001186 continue
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001187 # Insert as the oldest file. It will be deleted eventually if not
1188 # accessed.
1189 self._add(filename, False)
1190 logging.warning('Add unknown file %s to cache', filename)
1191 added += 1
maruel@chromium.org770993b2012-12-11 17:16:48 +00001192
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001193 if added:
1194 logging.warning('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001195 if previous:
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001196 logging.warning('Removed %d lost files', len(previous))
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001197 # Set explicitly in case self._add() wasn't called.
1198 self._state_need_to_be_saved = True
1199 # Filter out entries that were not found while keeping the previous
1200 # order.
1201 self.state = [
1202 (filename, size) for filename, size in self.state
1203 if filename not in previous
1204 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001205 self.trim()
1206
1207 def __enter__(self):
1208 return self
1209
1210 def __exit__(self, _exc_type, _exec_value, _traceback):
1211 with Profiler('CleanupTrimming'):
1212 self.trim()
1213
1214 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001215 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001216 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001217 '%5d (%8dkb) current',
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001218 len(self.state),
1219 sum(i[1] for i in self.state) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001220 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001221 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1222 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001223
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001224 def remove_file_at_index(self, index):
1225 """Removes the file at the given index."""
1226 try:
1227 self._state_need_to_be_saved = True
1228 filename, size = self.state.pop(index)
1229 # If the lookup was already stale, its possible the filename was not
1230 # present yet.
1231 self._lookup_is_stale = True
1232 self._lookup.pop(filename, None)
1233 self._removed.append(size)
1234 os.remove(self.path(filename))
1235 except OSError as e:
1236 logging.error('Error attempting to delete a file\n%s' % e)
1237
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001238 def remove_lru_file(self):
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001239 """Removes the last recently used file."""
1240 self.remove_file_at_index(0)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001241
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001242 def trim(self):
1243 """Trims anything we don't know, make sure enough free space exists."""
1244 # Ensure maximum cache size.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001245 if self.policies.max_cache_size and self.state:
1246 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1247 self.remove_lru_file()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001248
1249 # Ensure maximum number of items in the cache.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001250 if self.policies.max_items and self.state:
1251 while len(self.state) > self.policies.max_items:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001252 self.remove_lru_file()
1253
1254 # Ensure enough free space.
1255 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001256 trimmed_due_to_space = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001257 while (
1258 self.policies.min_free_space and
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001259 self.state and
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001260 self._free_disk < self.policies.min_free_space):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001261 trimmed_due_to_space = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001262 self.remove_lru_file()
1263 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001264 if trimmed_due_to_space:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001265 total = sum(i[1] for i in self.state)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001266 logging.warning(
1267 'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
1268 'cache (%.1f%% of its maximum capacity)',
1269 self._free_disk / 1024.,
1270 total / 1024.,
1271 100. * self.policies.max_cache_size / float(total),
1272 )
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001273 self.save()
1274
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001275 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001276 """Retrieves a file from the remote, if not already cached, and adds it to
1277 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001278
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001279 If the file is in the cache, verifiy that the file is valid (i.e. it is
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001280 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001281 """
1282 assert not '/' in item
1283 path = self.path(item)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001284 self._update_lookup()
1285 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001286
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001287 if index is not None:
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001288 if not valid_file(self.path(item), size):
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001289 self.remove_file_at_index(index)
1290 index = None
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001291 else:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001292 assert index < len(self.state)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001293 # Was already in cache. Update it's LRU value by putting it at the end.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001294 self._state_need_to_be_saved = True
1295 self._lookup_is_stale = True
1296 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001297
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001298 if index is None:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001299 if item in self._pending_queue:
1300 # Already pending. The same object could be referenced multiple times.
1301 return
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001302 # TODO(maruel): It should look at the free disk space, the current cache
1303 # size and the size of the new item on every new item:
1304 # - Trim the cache as more entries are listed when free disk space is low,
1305 # otherwise if the amount of data downloaded during the run > free disk
1306 # space, it'll crash.
1307 # - Make sure there's enough free disk space to fit all dependencies of
1308 # this run! If not, abort early.
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001309 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001310 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001311
1312 def add(self, filepath, obj):
1313 """Forcibly adds a file to the cache."""
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001314 self._update_lookup()
1315 if not obj in self._lookup:
maruel@chromium.orgba6489b2013-07-11 20:23:33 +00001316 link_file(self.path(obj), filepath, HARDLINK_WITH_FALLBACK)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001317 self._add(obj, True)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001318
1319 def path(self, item):
1320 """Returns the path to one item."""
1321 return os.path.join(self.cache_dir, item)
1322
1323 def save(self):
1324 """Saves the LRU ordering."""
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001325 if self._state_need_to_be_saved:
1326 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1327 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001328
1329 def wait_for(self, items):
1330 """Starts a loop that waits for at least one of |items| to be retrieved.
1331
1332 Returns the first item retrieved.
1333 """
1334 # Flush items already present.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001335 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001336 for item in items:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001337 if item in self._lookup:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001338 return item
1339
1340 assert all(i in self._pending_queue for i in items), (
1341 items, self._pending_queue)
1342 # Note that:
1343 # len(self._pending_queue) ==
1344 # ( len(self.remote._workers) - self.remote._ready +
1345 # len(self._remote._queue) + len(self._remote.done))
1346 # There is no lock-free way to verify that.
1347 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001348 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001349 self._pending_queue.remove(item)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001350 self._add(item, True)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001351 if item in items:
1352 return item
1353
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001354 def _add(self, item, at_end):
1355 """Adds an item in the internal state.
1356
1357 If |at_end| is False, self._lookup becomes inconsistent and
1358 self._update_lookup() must be called.
1359 """
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001360 size = os.stat(self.path(item)).st_size
1361 self._added.append(size)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001362 self._state_need_to_be_saved = True
1363 if at_end:
1364 self.state.append((item, size))
1365 self._lookup[item] = len(self.state) - 1
1366 else:
1367 self._lookup_is_stale = True
1368 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001369
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001370 def _update_lookup(self):
1371 if self._lookup_is_stale:
1372 self._lookup = dict(
1373 (filename, index) for index, (filename, _) in enumerate(self.state))
1374 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001375
1376
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001377class IsolatedFile(object):
1378 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001379 def __init__(self, obj_hash):
1380 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001381 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001382 self.obj_hash = obj_hash
1383 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001384 # .isolate and all the .isolated files recursively included by it with
1385 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1386 # .isolated file in the hash table, is important, as the later ones are not
1387 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001388 self.can_fetch = False
1389
1390 # Raw data.
1391 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001392 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001393 self.children = []
1394
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001395 # Set once the .isolated file is loaded.
1396 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001397 # Set once the files are fetched.
1398 self.files_fetched = False
1399
1400 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001401 """Verifies the .isolated file is valid and loads this object with the json
1402 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001403 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001404 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1405 assert not self._is_parsed
1406 self.data = load_isolated(content)
1407 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1408 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001409
1410 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001411 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001412
1413 Preemptively request files.
1414
1415 Note that |files| is modified by this function.
1416 """
1417 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001418 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001419 return
1420 logging.debug('fetch_files(%s)' % self.obj_hash)
1421 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001422 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001423 # overriden files must not be fetched.
1424 if filepath not in files:
1425 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001426 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001427 # Preemptively request files.
1428 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001429 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001430 self.files_fetched = True
1431
1432
1433class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001434 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001435 def __init__(self):
1436 self.command = []
1437 self.files = {}
1438 self.read_only = None
1439 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001440 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001441 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001442
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001443 def load(self, cache, root_isolated_hash):
1444 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001445
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001446 It enables support for "included" .isolated files. They are processed in
1447 strict order but fetched asynchronously from the cache. This is important so
1448 that a file in an included .isolated file that is overridden by an embedding
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001449 .isolated file is not fetched needlessly. The includes are fetched in one
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001450 pass and the files are fetched as soon as all the ones on the left-side
1451 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001452
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001453 The prioritization is very important here for nested .isolated files.
1454 'includes' have the highest priority and the algorithm is optimized for both
1455 deep and wide trees. A deep one is a long link of .isolated files referenced
1456 one at a time by one item in 'includes'. A wide one has a large number of
1457 'includes' in a single .isolated file. 'left' is defined as an included
1458 .isolated file earlier in the 'includes' list. So the order of the elements
1459 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001460 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001461 self.root = IsolatedFile(root_isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001462
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001463 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1464 pending = {}
1465 # Set of hashes of already retrieved items to refuse recursive includes.
1466 seen = set()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001467
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001468 def retrieve(isolated_file):
1469 h = isolated_file.obj_hash
1470 if h in seen:
1471 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1472 assert h not in pending
1473 seen.add(h)
1474 pending[h] = isolated_file
1475 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
1476
1477 retrieve(self.root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001478
1479 while pending:
1480 item_hash = cache.wait_for(pending)
1481 item = pending.pop(item_hash)
1482 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001483 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001484 # It's the root item.
1485 item.can_fetch = True
1486
1487 for new_child in item.children:
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001488 retrieve(new_child)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001489
1490 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001491 self._traverse_tree(cache, self.root)
1492
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001493 def check(n):
1494 return all(check(x) for x in n.children) and n.files_fetched
1495 assert check(self.root)
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001496
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001497 self.relative_cwd = self.relative_cwd or ''
1498 self.read_only = self.read_only or False
1499
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001500 def _traverse_tree(self, cache, node):
1501 if node.can_fetch:
1502 if not node.files_fetched:
1503 self._update_self(cache, node)
1504 will_break = False
1505 for i in node.children:
1506 if not i.can_fetch:
1507 if will_break:
1508 break
1509 # Automatically mark the first one as fetcheable.
1510 i.can_fetch = True
1511 will_break = True
1512 self._traverse_tree(cache, i)
1513
1514 def _update_self(self, cache, node):
1515 node.fetch_files(cache, self.files)
1516 # Grabs properties.
1517 if not self.command and node.data.get('command'):
1518 self.command = node.data['command']
1519 if self.read_only is None and node.data.get('read_only') is not None:
1520 self.read_only = node.data['read_only']
1521 if (self.relative_cwd is None and
1522 node.data.get('relative_cwd') is not None):
1523 self.relative_cwd = node.data['relative_cwd']
1524
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001525
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001526def create_directories(base_directory, files):
1527 """Creates the directory structure needed by the given list of files."""
1528 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1529 # Creates the tree of directories to create.
1530 directories = set(os.path.dirname(f) for f in files)
1531 for item in list(directories):
1532 while item:
1533 directories.add(item)
1534 item = os.path.dirname(item)
1535 for d in sorted(directories):
1536 if d:
1537 os.mkdir(os.path.join(base_directory, d))
1538
1539
1540def create_links(base_directory, files):
1541 """Creates any links needed by the given set of files."""
1542 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001543 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001544 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001545 if sys.platform == 'win32':
1546 # TODO(maruel): Create junctions or empty text files similar to what
1547 # cygwin do?
1548 logging.warning('Ignoring symlink %s', filepath)
1549 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001550 outfile = os.path.join(base_directory, filepath)
1551 # symlink doesn't exist on Windows. So the 'link' property should
1552 # never be specified for windows .isolated file.
1553 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1554 if 'm' in properties:
1555 lchmod = getattr(os, 'lchmod', None)
1556 if lchmod:
1557 lchmod(outfile, properties['m'])
1558
1559
1560def setup_commands(base_directory, cwd, cmd):
1561 """Correctly adjusts and then returns the required working directory
1562 and command needed to run the test.
1563 """
1564 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1565 cwd = os.path.join(base_directory, cwd)
1566 if not os.path.isdir(cwd):
1567 os.makedirs(cwd)
1568
1569 # Ensure paths are correctly separated on windows.
1570 cmd[0] = cmd[0].replace('/', os.path.sep)
1571 cmd = fix_python_path(cmd)
1572
1573 return cwd, cmd
1574
1575
1576def generate_remaining_files(files):
1577 """Generates a dictionary of all the remaining files to be downloaded."""
1578 remaining = {}
1579 for filepath, props in files:
1580 if 'h' in props:
1581 remaining.setdefault(props['h'], []).append((filepath, props))
1582
1583 return remaining
1584
1585
1586def download_test_data(isolated_hash, target_directory, remote):
1587 """Downloads the dependencies to the given directory."""
1588 if not os.path.exists(target_directory):
1589 os.makedirs(target_directory)
1590
1591 settings = Settings()
1592 no_cache = NoCache(target_directory, Remote(remote))
1593
1594 # Download all the isolated files.
1595 with Profiler('GetIsolateds') as _prof:
1596 settings.load(no_cache, isolated_hash)
1597
1598 if not settings.command:
1599 print >> sys.stderr, 'No command to run'
1600 return 1
1601
1602 with Profiler('GetRest') as _prof:
1603 create_directories(target_directory, settings.files)
1604 create_links(target_directory, settings.files.iteritems())
1605
1606 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1607 settings.command[:])
1608
1609 remaining = generate_remaining_files(settings.files.iteritems())
1610
1611 # Now block on the remaining files to be downloaded and mapped.
1612 logging.info('Retrieving remaining files')
1613 last_update = time.time()
1614 while remaining:
1615 obj = no_cache.wait_for(remaining)
1616 files = remaining.pop(obj)
1617
1618 for i, (filepath, properties) in enumerate(files):
1619 outfile = os.path.join(target_directory, filepath)
1620 logging.info(no_cache.path(obj))
1621
1622 if i + 1 == len(files):
1623 os.rename(no_cache.path(obj), outfile)
1624 else:
1625 shutil.copyfile(no_cache.path(obj), outfile)
1626
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001627 if 'm' in properties and not sys.platform == 'win32':
1628 # It's not set on Windows. It could be set only in the case of
1629 # downloading content generated from another OS. Do not crash in that
1630 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001631 os.chmod(outfile, properties['m'])
1632
1633 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
csharp@chromium.org5daba352013-07-03 17:29:27 +00001634 msg = '%d files remaining...' % len(remaining)
1635 print msg
1636 logging.info(msg)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001637 last_update = time.time()
1638
1639 print('.isolated files successfully downloaded and setup in %s' %
1640 target_directory)
1641 print('To run this test please run the command %s from the directory %s' %
1642 (cmd, cwd))
1643
1644 return 0
1645
1646
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001647def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001648 """Downloads the dependencies in the cache, hardlinks them into a temporary
1649 directory and runs the executable.
1650 """
1651 settings = Settings()
1652 with Cache(cache_dir, Remote(remote), policies) as cache:
1653 outdir = make_temp_dir('run_tha_test', cache_dir)
1654 try:
1655 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001656 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001657 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001658 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001659 # Adds it in the cache. While not strictly necessary, this simplifies
1660 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001661 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001662 cache.add(isolated_hash, h)
1663 isolated_hash = h
1664 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001665
1666 if not settings.command:
1667 print >> sys.stderr, 'No command to run'
1668 return 1
1669
1670 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001671 create_directories(outdir, settings.files)
1672 create_links(outdir, settings.files.iteritems())
1673 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001674
1675 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001676 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1677 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001678
1679 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001680 logging.info('Retrieving remaining files')
1681 last_update = time.time()
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +00001682 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001683 while remaining:
1684 detector.ping()
1685 obj = cache.wait_for(remaining)
1686 for filepath, properties in remaining.pop(obj):
1687 outfile = os.path.join(outdir, filepath)
maruel@chromium.orgb7c003d2013-07-24 13:04:30 +00001688 link_file(outfile, cache.path(obj), HARDLINK)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001689 if 'm' in properties:
1690 # It's not set on Windows.
1691 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001692
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001693 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1694 msg = '%d files remaining...' % len(remaining)
1695 print msg
1696 logging.info(msg)
1697 last_update = time.time()
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001698
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001699 if settings.read_only:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001700 logging.info('Making files read only')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001701 make_writable(outdir, True)
1702 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001703
1704 # TODO(csharp): This should be specified somewhere else.
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00001705 # TODO(vadimsh): Pass it via 'env_vars' in manifest.
csharp@chromium.orge217f302012-11-22 16:51:53 +00001706 # Add a rotating log file if one doesn't already exist.
1707 env = os.environ.copy()
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00001708 env.setdefault('RUN_TEST_CASES_LOG_FILE',
1709 os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001710 try:
1711 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001712 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001713 except OSError:
1714 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1715 raise
1716 finally:
1717 rmtree(outdir)
1718
1719
maruel@chromium.orgea101982013-07-24 15:54:29 +00001720class OptionParserWithLogging(optparse.OptionParser):
1721 """Adds --verbose option."""
1722 def __init__(self, verbose=0, log_file=None, **kwargs):
1723 kwargs.setdefault('description', sys.modules['__main__'].__doc__)
1724 optparse.OptionParser.__init__(self, **kwargs)
1725 self.add_option(
1726 '-v', '--verbose',
1727 action='count',
1728 default=verbose,
1729 help='Use multiple times to increase verbosity')
1730 self.add_option(
1731 '-l', '--log_file',
1732 default=log_file,
1733 help='The name of the file to store rotating log details.')
1734
1735 def parse_args(self, *args, **kwargs):
1736 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
1737 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
1738 level = levels[min(len(levels) - 1, options.verbose)]
1739
1740 logging_console = logging.StreamHandler()
1741 logging_console.setFormatter(logging.Formatter(
1742 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1743 logging_console.setLevel(level)
1744 logging.getLogger().setLevel(level)
1745 logging.getLogger().addHandler(logging_console)
1746
1747 if options.log_file:
1748 # This is necessary otherwise attached handler will miss the messages.
1749 logging.getLogger().setLevel(logging.DEBUG)
1750
1751 logging_rotating_file = logging.handlers.RotatingFileHandler(
1752 options.log_file,
1753 maxBytes=10 * 1024 * 1024,
1754 backupCount=5,
1755 encoding='utf-8')
1756 # log files are always at DEBUG level.
1757 logging_rotating_file.setLevel(logging.DEBUG)
1758 logging_rotating_file.setFormatter(logging.Formatter(
1759 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1760 logging.getLogger().addHandler(logging_rotating_file)
1761
1762 return options, args
1763
1764
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001765def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001766 disable_buffering()
maruel@chromium.orgea101982013-07-24 15:54:29 +00001767 parser = OptionParserWithLogging(
1768 usage='%prog <options>', log_file=RUN_ISOLATED_LOG_FILE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001769
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001770 group = optparse.OptionGroup(parser, 'Download')
1771 group.add_option(
1772 '--download', metavar='DEST',
1773 help='Downloads files to DEST and returns without running, instead of '
1774 'downloading and then running from a temporary directory.')
1775 parser.add_option_group(group)
1776
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001777 group = optparse.OptionGroup(parser, 'Data source')
1778 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001779 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001780 metavar='FILE',
1781 help='File/url describing what to map or run')
1782 group.add_option(
1783 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001784 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001785 parser.add_option_group(group)
1786
1787 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001788 '-r', '--remote', metavar='URL',
1789 default=
1790 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1791 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001792 group = optparse.OptionGroup(parser, 'Cache management')
1793 group.add_option(
1794 '--cache',
1795 default='cache',
1796 metavar='DIR',
1797 help='Cache directory, default=%default')
1798 group.add_option(
1799 '--max-cache-size',
1800 type='int',
1801 metavar='NNN',
1802 default=20*1024*1024*1024,
1803 help='Trim if the cache gets larger than this value, default=%default')
1804 group.add_option(
1805 '--min-free-space',
1806 type='int',
1807 metavar='NNN',
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001808 default=2*1024*1024*1024,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001809 help='Trim if disk free space becomes lower than this value, '
1810 'default=%default')
1811 group.add_option(
1812 '--max-items',
1813 type='int',
1814 metavar='NNN',
1815 default=100000,
1816 help='Trim if more than this number of items are in the cache '
1817 'default=%default')
1818 parser.add_option_group(group)
1819
1820 options, args = parser.parse_args()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001821
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001822 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001823 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001824 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001825 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001826 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001827 parser.error('Unsupported args %s' % ' '.join(args))
1828
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001829 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001830 policies = CachePolicies(
1831 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001832
1833 if options.download:
1834 return download_test_data(options.isolated or options.hash,
1835 options.download, options.remote)
1836 else:
1837 try:
1838 return run_tha_test(
1839 options.isolated or options.hash,
1840 options.cache,
1841 options.remote,
1842 policies)
1843 except Exception, e:
1844 # Make sure any exception is logged.
1845 logging.exception(e)
1846 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001847
1848
1849if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001850 # Ensure that we are always running with the correct encoding.
1851 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001852 sys.exit(main())