blob: fd8feb9dd3b53b38e28ed7145e239a9e78721f22 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000013import functools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000015import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000016import inspect
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000017import itertools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000019import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000020import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000021import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000022import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000023import optparse
24import os
25import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000026import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000027import re
28import shutil
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000029import socket
30import ssl
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000031import stat
32import subprocess
33import sys
34import tempfile
35import threading
36import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000037import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000039import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000040import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000041import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000042
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000043from utils import zip_package
44
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000045# Try to import 'upload' module used by AppEngineService for authentication.
46# If it is not there, app engine authentication support will be disabled.
47try:
48 from third_party import upload
49 # Hack out upload logging.info()
50 upload.logging = logging.getLogger('upload')
51 # Mac pylint choke on this line.
52 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
53except ImportError:
54 upload = None
55
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000056
vadimsh@chromium.org85071062013-08-21 23:37:45 +000057# Absolute path to this file (can be None if running from zip on Mac).
58THIS_FILE_PATH = os.path.abspath(__file__) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000059
60# Directory that contains this file (might be inside zip package).
vadimsh@chromium.org85071062013-08-21 23:37:45 +000061BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000062
63# Directory that contains currently running script file.
64MAIN_DIR = os.path.dirname(os.path.abspath(zip_package.get_main_script_path()))
65
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000066# Types of action accepted by link_file().
maruel@chromium.orgba6489b2013-07-11 20:23:33 +000067HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY = range(1, 5)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000068
69RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
70
csharp@chromium.org8dc52542012-11-08 20:29:55 +000071# The file size to be used when we don't know the correct file size,
72# generally used for .isolated files.
73UNKNOWN_FILE_SIZE = None
74
csharp@chromium.orga92403f2012-11-20 15:13:59 +000075# The size of each chunk to read when downloading and unzipping files.
76ZIPPED_FILE_CHUNK = 16 * 1024
77
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000078# The name of the log file to use.
79RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
80
csharp@chromium.orge217f302012-11-22 16:51:53 +000081# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000082RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000083
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000084# The delay (in seconds) to wait between logging statements when retrieving
85# the required files. This is intended to let the user (or buildbot) know that
86# the program is still running.
87DELAY_BETWEEN_UPDATES_IN_SECS = 30
88
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +000089# Maximum expected delay (in seconds) between successive file fetches
90# in run_tha_test. If it takes longer than that, a deadlock might be happening
91# and all stack frames for all threads are dumped to log.
92DEADLOCK_TIMEOUT = 5 * 60
93
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000094# The name of the key to store the count of url attempts.
95COUNT_KEY = 'UrlOpenAttempt'
96
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000097# Default maximum number of attempts to trying opening a url before aborting.
98URL_OPEN_MAX_ATTEMPTS = 30
99# Default timeout when retrying.
100URL_OPEN_TIMEOUT = 6*60.
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000101
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000102# Read timeout in seconds for downloads from isolate storage. If there's no
103# response from the server within this timeout whole download will be aborted.
104DOWNLOAD_READ_TIMEOUT = 60
105
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000106# Global (for now) map: server URL (http://example.com) -> HttpService instance.
107# Used by get_http_service to cache HttpService instances.
108_http_services = {}
109_http_services_lock = threading.Lock()
110
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000111# Used by get_flavor().
112FLAVOR_MAPPING = {
113 'cygwin': 'win',
114 'win32': 'win',
115 'darwin': 'mac',
116 'sunos5': 'solaris',
117 'freebsd7': 'freebsd',
118 'freebsd8': 'freebsd',
119}
120
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000121
122class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000123 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000124 pass
125
126
127class MappingError(OSError):
128 """Failed to recreate the tree."""
129 pass
130
131
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000132class TimeoutError(IOError):
133 """Timeout while reading HTTP response."""
134
135 def __init__(self, inner_exc=None):
136 super(TimeoutError, self).__init__(str(inner_exc or 'Timeout'))
137 self.inner_exc = inner_exc
138
139
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000140def get_as_zip_package(executable=True):
141 """Returns ZipPackage with this module and all its dependencies.
142
143 If |executable| is True will store run_isolated.py as __main__.py so that
144 zip package is directly executable be python.
145 """
146 # Building a zip package when running from another zip package is
147 # unsupported and probably unneeded.
148 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000149 assert THIS_FILE_PATH
150 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000151 package = zip_package.ZipPackage(root=BASE_DIR)
152 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
153 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
154 package.add_directory(os.path.join(BASE_DIR, 'utils'))
155 return package
156
157
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000158def get_flavor():
159 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000160 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000161
162
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000163def fix_default_encoding():
164 """Forces utf8 solidly on all platforms.
165
166 By default python execution environment is lazy and defaults to ascii
167 encoding.
168
169 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
170 """
171 if sys.getdefaultencoding() == 'utf-8':
172 return False
173
174 # Regenerate setdefaultencoding.
175 reload(sys)
176 # Module 'sys' has no 'setdefaultencoding' member
177 # pylint: disable=E1101
178 sys.setdefaultencoding('utf-8')
179 for attr in dir(locale):
180 if attr[0:3] != 'LC_':
181 continue
182 aref = getattr(locale, attr)
183 try:
184 locale.setlocale(aref, '')
185 except locale.Error:
186 continue
187 try:
188 lang = locale.getlocale(aref)[0]
189 except (TypeError, ValueError):
190 continue
191 if lang:
192 try:
193 locale.setlocale(aref, (lang, 'UTF-8'))
194 except locale.Error:
195 os.environ[attr] = lang + '.UTF-8'
196 try:
197 locale.setlocale(locale.LC_ALL, '')
198 except locale.Error:
199 pass
200 return True
201
202
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000203class Unbuffered(object):
204 """Disable buffering on a file object."""
205 def __init__(self, stream):
206 self.stream = stream
207
208 def write(self, data):
209 self.stream.write(data)
210 if '\n' in data:
211 self.stream.flush()
212
213 def __getattr__(self, attr):
214 return getattr(self.stream, attr)
215
216
217def disable_buffering():
218 """Makes this process and child processes stdout unbuffered."""
219 if not os.environ.get('PYTHONUNBUFFERED'):
220 # Since sys.stdout is a C++ object, it's impossible to do
221 # sys.stdout.write = lambda...
222 sys.stdout = Unbuffered(sys.stdout)
223 os.environ['PYTHONUNBUFFERED'] = 'x'
224
225
maruel@chromium.orgea101982013-07-24 15:54:29 +0000226def num_processors():
227 """Returns the number of processors.
228
229 Python on OSX 10.6 raises a NotImplementedError exception.
230 """
231 try:
232 # Multiprocessing
233 import multiprocessing
234 return multiprocessing.cpu_count()
235 except: # pylint: disable=W0702
236 try:
237 # Mac OS 10.6
238 return int(os.sysconf('SC_NPROCESSORS_ONLN')) # pylint: disable=E1101
239 except:
240 # Some of the windows builders seem to get here.
241 return 4
242
243
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000244def os_link(source, link_name):
245 """Add support for os.link() on Windows."""
246 if sys.platform == 'win32':
247 if not ctypes.windll.kernel32.CreateHardLinkW(
248 unicode(link_name), unicode(source), 0):
249 raise OSError()
250 else:
251 os.link(source, link_name)
252
253
254def readable_copy(outfile, infile):
255 """Makes a copy of the file that is readable by everyone."""
csharp@chromium.org59d116d2013-07-05 18:04:08 +0000256 shutil.copy2(infile, outfile)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000257 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
258 stat.S_IRGRP | stat.S_IROTH)
259 os.chmod(outfile, read_enabled_mode)
260
261
262def link_file(outfile, infile, action):
263 """Links a file. The type of link depends on |action|."""
264 logging.debug('Mapping %s to %s' % (infile, outfile))
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000265 if action not in (HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000266 raise ValueError('Unknown mapping action %s' % action)
267 if not os.path.isfile(infile):
268 raise MappingError('%s is missing' % infile)
269 if os.path.isfile(outfile):
270 raise MappingError(
271 '%s already exist; insize:%d; outsize:%d' %
272 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
273
274 if action == COPY:
275 readable_copy(outfile, infile)
276 elif action == SYMLINK and sys.platform != 'win32':
277 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000278 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000279 else:
280 try:
281 os_link(infile, outfile)
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000282 except OSError as e:
283 if action == HARDLINK:
284 raise MappingError(
285 'Failed to hardlink %s to %s: %s' % (infile, outfile, e))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000286 # Probably a different file system.
maruel@chromium.org9e98e432013-05-31 17:06:51 +0000287 logging.warning(
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000288 'Failed to hardlink, failing back to copy %s to %s' % (
289 infile, outfile))
290 readable_copy(outfile, infile)
291
292
293def _set_write_bit(path, read_only):
294 """Sets or resets the executable bit on a file or directory."""
295 mode = os.lstat(path).st_mode
296 if read_only:
297 mode = mode & 0500
298 else:
299 mode = mode | 0200
300 if hasattr(os, 'lchmod'):
301 os.lchmod(path, mode) # pylint: disable=E1101
302 else:
303 if stat.S_ISLNK(mode):
304 # Skip symlink without lchmod() support.
305 logging.debug('Can\'t change +w bit on symlink %s' % path)
306 return
307
308 # TODO(maruel): Implement proper DACL modification on Windows.
309 os.chmod(path, mode)
310
311
312def make_writable(root, read_only):
313 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000314 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000315 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
316 for filename in filenames:
317 _set_write_bit(os.path.join(dirpath, filename), read_only)
318
319 for dirname in dirnames:
320 _set_write_bit(os.path.join(dirpath, dirname), read_only)
321
322
323def rmtree(root):
324 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
325 make_writable(root, False)
326 if sys.platform == 'win32':
327 for i in range(3):
328 try:
329 shutil.rmtree(root)
330 break
331 except WindowsError: # pylint: disable=E0602
332 delay = (i+1)*2
333 print >> sys.stderr, (
334 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
335 time.sleep(delay)
336 else:
337 shutil.rmtree(root)
338
339
340def is_same_filesystem(path1, path2):
341 """Returns True if both paths are on the same filesystem.
342
343 This is required to enable the use of hardlinks.
344 """
345 assert os.path.isabs(path1), path1
346 assert os.path.isabs(path2), path2
347 if sys.platform == 'win32':
348 # If the drive letter mismatches, assume it's a separate partition.
349 # TODO(maruel): It should look at the underlying drive, a drive letter could
350 # be a mount point to a directory on another drive.
351 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
352 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
353 if path1[0].lower() != path2[0].lower():
354 return False
355 return os.stat(path1).st_dev == os.stat(path2).st_dev
356
357
358def get_free_space(path):
359 """Returns the number of free bytes."""
360 if sys.platform == 'win32':
361 free_bytes = ctypes.c_ulonglong(0)
362 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
363 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
364 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000365 # For OSes other than Windows.
366 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000367 return f.f_bfree * f.f_frsize
368
369
370def make_temp_dir(prefix, root_dir):
371 """Returns a temporary directory on the same file system as root_dir."""
372 base_temp_dir = None
373 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
374 base_temp_dir = os.path.dirname(root_dir)
375 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
376
377
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000378def load_isolated(content, os_flavor=None):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000379 """Verifies the .isolated file is valid and loads this object with the json
380 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000381 """
382 try:
383 data = json.loads(content)
384 except ValueError:
385 raise ConfigError('Failed to parse: %s...' % content[:100])
386
387 if not isinstance(data, dict):
388 raise ConfigError('Expected dict, got %r' % data)
389
390 for key, value in data.iteritems():
391 if key == 'command':
392 if not isinstance(value, list):
393 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000394 if not value:
395 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000396 for subvalue in value:
397 if not isinstance(subvalue, basestring):
398 raise ConfigError('Expected string, got %r' % subvalue)
399
400 elif key == 'files':
401 if not isinstance(value, dict):
402 raise ConfigError('Expected dict, got %r' % value)
403 for subkey, subvalue in value.iteritems():
404 if not isinstance(subkey, basestring):
405 raise ConfigError('Expected string, got %r' % subkey)
406 if not isinstance(subvalue, dict):
407 raise ConfigError('Expected dict, got %r' % subvalue)
408 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000409 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000410 if not isinstance(subsubvalue, basestring):
411 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000412 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000413 if not isinstance(subsubvalue, int):
414 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000415 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000416 if not RE_IS_SHA1.match(subsubvalue):
417 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000418 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000419 if not isinstance(subsubvalue, int):
420 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000421 else:
422 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000423 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000424 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000425 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
426 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000427
428 elif key == 'includes':
429 if not isinstance(value, list):
430 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000431 if not value:
432 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000433 for subvalue in value:
434 if not RE_IS_SHA1.match(subvalue):
435 raise ConfigError('Expected sha-1, got %r' % subvalue)
436
437 elif key == 'read_only':
438 if not isinstance(value, bool):
439 raise ConfigError('Expected bool, got %r' % value)
440
441 elif key == 'relative_cwd':
442 if not isinstance(value, basestring):
443 raise ConfigError('Expected string, got %r' % value)
444
445 elif key == 'os':
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000446 expected_value = os_flavor or get_flavor()
447 if value != expected_value:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000448 raise ConfigError(
449 'Expected \'os\' to be \'%s\' but got \'%s\'' %
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000450 (expected_value, value))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000451
452 else:
453 raise ConfigError('Unknown key %s' % key)
454
455 return data
456
457
458def fix_python_path(cmd):
459 """Returns the fixed command line to call the right python executable."""
460 out = cmd[:]
461 if out[0] == 'python':
462 out[0] = sys.executable
463 elif out[0].endswith('.py'):
464 out.insert(0, sys.executable)
465 return out
466
467
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000468def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000469 """Attempts to open the given url multiple times.
470
471 |data| can be either:
472 -None for a GET request
473 -str for pre-encoded data
474 -list for data to be encoded
475 -dict for data to be encoded (COUNT_KEY will be added in this case)
476
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000477 Returns HttpResponse object, where the response may be read from, or None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000478 if it was unable to connect.
479 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000480 urlhost, urlpath = split_server_request_url(url)
481 service = get_http_service(urlhost)
482 return service.request(urlpath, **kwargs)
483
484
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000485def url_read(url, **kwargs):
486 """Attempts to open the given url multiple times and read all data from it.
487
488 Accepts same arguments as url_open function.
489
490 Returns all data read or None if it was unable to connect or read the data.
491 """
492 response = url_open(url, **kwargs)
493 if not response:
494 return None
495 try:
496 return response.read()
497 except TimeoutError:
498 return None
499
500
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000501def split_server_request_url(url):
502 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000503 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000504 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
505 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
506 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000507
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000508
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000509def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000510 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000511 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000512 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000513 # Ensure consistency.
514 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000515 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000516 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000517 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000518 service = AppEngineService(urlhost)
519 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000520 return service
521
522
523class HttpService(object):
524 """Base class for a class that provides an API to HTTP based service:
525 - Provides 'request' method.
526 - Supports automatic request retries.
527 - Supports persistent cookies.
528 - Thread safe.
529 """
530
531 # File to use to store all auth cookies.
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000532 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000533
534 # CookieJar reused by all services + lock that protects its instantiation.
535 _cookie_jar = None
536 _cookie_jar_lock = threading.Lock()
537
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000538 def __init__(self, urlhost):
539 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000540 self.cookie_jar = self.load_cookie_jar()
541 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000542
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000543 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000544 """Called when HTTP server asks client to authenticate.
545 Can be implemented in subclasses.
546 """
547 return False
548
549 @staticmethod
550 def load_cookie_jar():
551 """Returns global CoookieJar object that stores cookies in the file."""
552 with HttpService._cookie_jar_lock:
553 if HttpService._cookie_jar is not None:
554 return HttpService._cookie_jar
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000555 jar = ThreadSafeCookieJar(HttpService.COOKIE_FILE)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000556 jar.load()
557 HttpService._cookie_jar = jar
558 return jar
559
560 @staticmethod
561 def save_cookie_jar():
562 """Called when cookie jar needs to be flushed to disk."""
563 with HttpService._cookie_jar_lock:
564 if HttpService._cookie_jar is not None:
565 HttpService._cookie_jar.save()
566
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000567 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000568 """Returns OpenerDirector that will be used when sending requests.
569 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000570 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000571
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000572 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000573 """Attempts to open the given url multiple times.
574
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000575 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000576
577 |data| can be either:
578 -None for a GET request
579 -str for pre-encoded data
580 -list for data to be encoded
581 -dict for data to be encoded (COUNT_KEY will be added in this case)
582
583 Returns a file-like object, where the response may be read from, or None
584 if it was unable to connect.
585 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000586 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000587
588 if isinstance(data, dict) and COUNT_KEY in data:
589 logging.error('%s already existed in the data passed into UlrOpen. It '
590 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
591 return None
592
593 method = 'GET' if data is None else 'POST'
594 assert not ((method != 'POST') and content_type), (
595 'Can\'t use content_type on GET')
596
597 def make_request(extra):
598 """Returns a urllib2.Request instance for this specific retry."""
599 if isinstance(data, str) or data is None:
600 payload = data
601 else:
602 if isinstance(data, dict):
603 payload = data.items()
604 else:
605 payload = data[:]
606 payload.extend(extra.iteritems())
607 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000608 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000609 if isinstance(data, str) or data is None:
610 # In these cases, add the extra parameter to the query part of the url.
611 url_parts = list(urlparse.urlparse(new_url))
612 # Append the query parameter.
613 if url_parts[4] and extra:
614 url_parts[4] += '&'
615 url_parts[4] += urllib.urlencode(extra)
616 new_url = urlparse.urlunparse(url_parts)
617 request = urllib2.Request(new_url, data=payload)
618 if payload is not None:
619 if content_type:
620 request.add_header('Content-Type', content_type)
621 request.add_header('Content-Length', len(payload))
622 return request
623
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000624 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000625
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000626 def _retry_loop(
627 self,
628 make_request,
629 max_attempts=URL_OPEN_MAX_ATTEMPTS,
630 retry_404=False,
631 retry_50x=True,
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000632 timeout=URL_OPEN_TIMEOUT,
633 read_timeout=None):
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000634 """Runs internal request-retry loop.
635
636 - Optionally retries HTTP 404 and 50x.
637 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
638 number of retries.
639 - Retries up to |timeout| duration in seconds. If None or 0, there's no
640 limit in the time taken to do retries.
641 - If both |max_attempts| and |timeout| are None or 0, this functions retries
642 indefinitely.
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000643
644 If |read_timeout| is not None will configure underlying socket to
645 raise TimeoutError exception whenever there's no response from the server
646 for more than |read_timeout| seconds. It can happen during any read
647 operation so once you pass non-None |read_timeout| be prepared to handle
648 these exceptions in subsequent reads from the stream.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000649 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000650 authenticated = False
651 last_error = None
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000652 attempt = 0
653 start = self._now()
654 for attempt in itertools.count():
655 if max_attempts and attempt >= max_attempts:
656 # Too many attempts.
657 break
658 if timeout and (self._now() - start) >= timeout:
659 # Retried for too long.
660 break
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000661 extra = {COUNT_KEY: attempt} if attempt else {}
662 request = make_request(extra)
663 try:
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000664 url_response = self._url_open(request, timeout=read_timeout)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000665 logging.debug('url_open(%s) succeeded', request.get_full_url())
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000666 return HttpResponse(url_response, request.get_full_url())
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000667 except urllib2.HTTPError as e:
668 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000669 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000670 # Try to authenticate only once. If it doesn't help, then server does
671 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000672 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000673 'Authentication is required for %s on attempt %d.\n%s',
674 request.get_full_url(), attempt,
675 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000676 if not authenticated and self.authenticate():
677 authenticated = True
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000678 # Do not sleep.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000679 continue
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000680 # If authentication failed, return.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000681 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000682 'Unable to authenticate to %s.\n%s',
683 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000684 return None
685
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000686 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
687 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000688 # This HTTPError means we reached the server and there was a problem
689 # with the request, so don't retry.
690 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000691 'Able to connect to %s but an exception was thrown.\n%s',
692 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000693 return None
694
695 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000696 logging.warning('Able to connect to %s on attempt %d.\n%s',
697 request.get_full_url(), attempt,
698 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000699 last_error = e
700
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000701 except (urllib2.URLError, httplib.HTTPException,
702 socket.timeout, ssl.SSLError) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000703 logging.warning('Unable to open url %s on attempt %d.\n%s',
704 request.get_full_url(), attempt,
705 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000706 last_error = e
707
708 # Only sleep if we are going to try again.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000709 if max_attempts and attempt != max_attempts:
710 remaining = None
711 if timeout:
712 remaining = timeout - (self._now() - start)
713 if remaining <= 0:
714 break
715 self.sleep_before_retry(attempt, remaining)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000716
717 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000718 request.get_full_url(), max_attempts,
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000719 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000720 return None
721
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000722 def _url_open(self, request, timeout=None):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000723 """Low level method to execute urllib2.Request's.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000724
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000725 To be mocked in tests.
726 """
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000727 if timeout is not None:
728 return self.opener.open(request, timeout=timeout)
729 else:
730 # Leave original default value for |timeout|. It's nontrivial.
731 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000732
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000733 @staticmethod
734 def _now():
735 """To be mocked in tests."""
736 return time.time()
737
738 @staticmethod
739 def calculate_sleep_before_retry(attempt, max_duration):
740 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
741 # survive.
742 MAX_SLEEP = 10.
743 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
744 # time by starting with 1.5/2+1.5^-1 median offset.
745 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
746 assert duration > 0.1
747 duration = min(MAX_SLEEP, duration)
748 if max_duration:
749 duration = min(max_duration, duration)
750 return duration
751
752 @classmethod
753 def sleep_before_retry(cls, attempt, max_duration):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000754 """Sleeps for some amount of time when retrying the request.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000755
756 To be mocked in tests.
757 """
758 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
maruel@chromium.orgef333122013-03-12 20:36:40 +0000759
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000760 @staticmethod
761 def _format_exception(exc, verbose=False):
762 """Given an instance of some exception raised by urlopen returns human
763 readable piece of text with detailed information about the error.
764 """
765 out = ['Exception: %s' % (exc,)]
766 if verbose:
767 if isinstance(exc, urllib2.HTTPError):
768 out.append('-' * 10)
769 if exc.hdrs:
770 for header, value in exc.hdrs.items():
771 if not header.startswith('x-'):
772 out.append('%s: %s' % (header.capitalize(), value))
773 out.append('')
774 out.append(exc.read() or '<empty body>')
775 out.append('-' * 10)
776 return '\n'.join(out)
777
maruel@chromium.orgef333122013-03-12 20:36:40 +0000778
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000779class HttpResponse(object):
780 """Response from HttpService."""
781
782 def __init__(self, url_response, url):
783 self._url_response = url_response
784 self._url = url
785 self._read = 0
786
787 @property
788 def content_length(self):
789 """Total length to the response or None if not known in advance."""
790 length = self._url_response.headers.get('Content-Length')
791 return int(length) if length is not None else None
792
793 def read(self, size=None):
794 """Reads up to |size| bytes from the stream and returns them.
795
796 If |size| is None reads all available bytes.
797
798 Raises TimeoutError on read timeout.
799 """
800 try:
801 data = self._url_response.read(size)
802 self._read += len(data)
803 return data
804 except (socket.timeout, ssl.SSLError) as e:
805 logging.error('Timeout while reading from %s, read %d of %s: %s',
806 self._url, self._read, self.content_length, e)
807 raise TimeoutError(e)
808
809
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000810class AppEngineService(HttpService):
811 """This class implements authentication support for
812 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000813 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000814
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000815 # This lock ensures that user won't be confused with multiple concurrent
816 # login prompts.
817 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000818
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000819 def __init__(self, urlhost, email=None, password=None):
820 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000821 self.email = email
822 self.password = password
823 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000824
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000825 def authenticate(self):
826 """Authenticates in the app engine application.
827 Returns True on success.
828 """
829 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000830 logging.error('\'upload\' module is missing, '
831 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000832 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000833 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000834 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000835 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000836 class AuthServer(upload.AbstractRpcServer):
837 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000838 # Authentication code needs to know about 302 response.
839 # So make OpenerDirector without HTTPRedirectHandler.
840 opener = urllib2.OpenerDirector()
841 opener.add_handler(urllib2.ProxyHandler())
842 opener.add_handler(urllib2.UnknownHandler())
843 opener.add_handler(urllib2.HTTPHandler())
844 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
845 opener.add_handler(urllib2.HTTPSHandler())
846 opener.add_handler(urllib2.HTTPErrorProcessor())
847 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000848 return opener
849 def PerformAuthentication(self):
850 self._Authenticate()
851 save_cookie_jar()
852 return self.authenticated
853 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000854 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000855 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000856
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000857 def get_credentials(self):
858 """Called during authentication process to get the credentials.
859 May be called mutliple times if authentication fails.
860 Returns tuple (email, password).
861 """
862 # 'authenticate' calls this only if 'upload' is present.
863 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
864 assert upload, '\'upload\' module is required for this to work'
865 if self.email and self.password:
866 return (self.email, self.password)
867 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000868 self._keyring = upload.KeyringCreds(self.urlhost,
869 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000870 self.email)
871 return self._keyring.GetUserCredentials()
872
873
874class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
875 """MozillaCookieJar with thread safe load and save."""
876
877 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
878 """Loads cookies from the file if it exists."""
maruel@chromium.org4e2676d2013-06-06 18:39:48 +0000879 filename = os.path.expanduser(filename or self.filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000880 with self._cookies_lock:
881 if os.path.exists(filename):
882 try:
883 cookielib.MozillaCookieJar.load(self, filename,
884 ignore_discard,
885 ignore_expires)
886 logging.debug('Loaded cookies from %s', filename)
887 except (cookielib.LoadError, IOError):
888 pass
889 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000890 try:
891 fd = os.open(filename, os.O_CREAT, 0600)
892 os.close(fd)
893 except OSError:
894 logging.error('Failed to create %s', filename)
895 try:
896 os.chmod(filename, 0600)
897 except OSError:
898 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000899
900 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
901 """Saves cookies to the file, completely overwriting it."""
902 logging.debug('Saving cookies to %s', filename or self.filename)
903 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000904 try:
905 cookielib.MozillaCookieJar.save(self, filename,
906 ignore_discard,
907 ignore_expires)
908 except OSError:
909 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000910
911
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000912class ThreadPoolError(Exception):
913 """Base class for exceptions raised by ThreadPool."""
914
915
916class ThreadPoolEmpty(ThreadPoolError):
917 """Trying to get task result from a thread pool with no pending tasks."""
918
919
920class ThreadPoolClosed(ThreadPoolError):
921 """Trying to do something with a closed thread pool."""
922
923
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000924class ThreadPool(object):
925 """Implements a multithreaded worker pool oriented for mapping jobs with
926 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000927
928 Arguments:
929 - initial_threads: Number of threads to start immediately. Can be 0 if it is
930 uncertain that threads will be needed.
931 - max_threads: Maximum number of threads that will be started when all the
932 threads are busy working. Often the number of CPU cores.
933 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
934 queue. A non-zero value may make add_task() blocking.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000935 - prefix: Prefix to use for thread names. Pool's threads will be
936 named '<prefix>-<thread index>'.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000937 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000938 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000939
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000940 def __init__(self, initial_threads, max_threads, queue_size, prefix=None):
941 prefix = prefix or 'tp-0x%0x' % id(self)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000942 logging.debug(
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000943 'New ThreadPool(%d, %d, %d): %s', initial_threads, max_threads,
944 queue_size, prefix)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000945 assert initial_threads <= max_threads
946 # Update this check once 256 cores CPU are common.
947 assert max_threads <= 256
948
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000949 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000950 self._max_threads = max_threads
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000951 self._prefix = prefix
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000952
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000953 # Used to assign indexes to tasks.
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000954 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000955 self._num_of_added_tasks = 0
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000956
957 # Lock that protected everything below (including conditional variable).
958 self._lock = threading.Lock()
959
960 # Condition 'bool(_outputs) or bool(_exceptions) or _pending_count == 0'.
961 self._outputs_exceptions_cond = threading.Condition(self._lock)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000962 self._outputs = []
963 self._exceptions = []
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000964
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000965 # Number of pending tasks (queued or being processed now).
966 self._pending_count = 0
967
968 # List of threads.
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000969 self._workers = []
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000970 # Number of threads that are waiting for new tasks.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000971 self._ready = 0
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000972 # Number of threads already added to _workers, but not yet running the loop.
973 self._starting = 0
974 # True if close was called. Forbids adding new tasks.
975 self._is_closed = False
976
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000977 for _ in range(initial_threads):
978 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000979
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000980 def _add_worker(self):
981 """Adds one worker thread if there isn't too many. Thread-safe."""
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000982 with self._lock:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000983 if len(self._workers) >= self._max_threads or self._is_closed:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000984 return False
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000985 worker = threading.Thread(
986 name='%s-%d' % (self._prefix, len(self._workers)), target=self._run)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000987 self._workers.append(worker)
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000988 self._starting += 1
989 logging.debug('Starting worker thread %s', worker.name)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000990 worker.daemon = True
991 worker.start()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000992 return True
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000993
maruel@chromium.org831958f2013-01-22 15:01:46 +0000994 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000995 """Adds a task, a function to be executed by a worker.
996
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000997 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000998 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000999
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001000 |func| can either return a return value to be added to the output list or
1001 be a generator which can emit multiple values.
1002
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001003 Returns the index of the item added, e.g. the total number of enqueued items
1004 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001005 """
maruel@chromium.org831958f2013-01-22 15:01:46 +00001006 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001007 assert callable(func)
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001008 with self._lock:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001009 if self._is_closed:
1010 raise ThreadPoolClosed('Can not add a task to a closed ThreadPool')
1011 start_new_worker = (
1012 # Pending task count plus new task > number of available workers.
1013 self.tasks.qsize() + 1 > self._ready + self._starting and
1014 # Enough slots.
1015 len(self._workers) < self._max_threads
1016 )
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001017 self._pending_count += 1
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001018 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001019 self._num_of_added_tasks += 1
1020 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001021 self.tasks.put((priority, index, func, args, kwargs))
1022 if start_new_worker:
1023 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001024 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001025
1026 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001027 """Worker thread loop. Runs until a None task is queued."""
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001028 # Thread has started, adjust counters.
1029 with self._lock:
1030 self._starting -= 1
1031 self._ready += 1
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001032 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001033 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001034 task = self.tasks.get()
1035 finally:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001036 with self._lock:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001037 self._ready -= 1
1038 try:
1039 if task is None:
1040 # We're done.
1041 return
1042 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001043 if inspect.isgeneratorfunction(func):
1044 for out in func(*args, **kwargs):
1045 self._output_append(out)
1046 else:
1047 out = func(*args, **kwargs)
1048 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001049 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001050 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001051 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +00001052 logging.info(''.join(traceback.format_tb(exc_info[2])))
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001053 with self._outputs_exceptions_cond:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001054 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001055 self._outputs_exceptions_cond.notifyAll()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001056 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +00001057 try:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001058 # Mark thread as ready again, mark task as processed. Do it before
1059 # waking up threads waiting on self.tasks.join(). Otherwise they might
1060 # find ThreadPool still 'busy' and perform unnecessary wait on CV.
1061 with self._outputs_exceptions_cond:
1062 self._ready += 1
1063 self._pending_count -= 1
1064 if self._pending_count == 0:
1065 self._outputs_exceptions_cond.notifyAll()
csharp@chromium.org60991182013-03-18 13:44:17 +00001066 self.tasks.task_done()
1067 except Exception as e:
1068 # We need to catch and log this error here because this is the root
1069 # function for the thread, nothing higher will catch the error.
1070 logging.exception('Caught exception while marking task as done: %s',
1071 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001072
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001073 def _output_append(self, out):
1074 if out is not None:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001075 with self._outputs_exceptions_cond:
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001076 self._outputs.append(out)
1077 self._outputs_exceptions_cond.notifyAll()
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001078
maruel@chromium.orgeb281652012-11-08 21:10:23 +00001079 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001080 """Extracts all the results from each threads unordered.
1081
1082 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001083
1084 Note: will wait for all work items to be done before returning an exception.
1085 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001086 """
1087 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +00001088 self.tasks.join()
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001089 with self._outputs_exceptions_cond:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001090 if self._exceptions:
1091 e = self._exceptions.pop(0)
1092 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001093 out = self._outputs
1094 self._outputs = []
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001095 return out
1096
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001097 def get_one_result(self):
1098 """Returns the next item that was generated or raises an exception if one
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001099 occurred.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001100
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001101 Raises:
1102 ThreadPoolEmpty - no results available.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001103 """
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001104 # Get first available result.
1105 for result in self.iter_results():
1106 return result
1107 # No results -> tasks queue is empty.
1108 raise ThreadPoolEmpty('Task queue is empty')
1109
1110 def iter_results(self):
1111 """Yields results as they appear until all tasks are processed."""
1112 while True:
1113 # Check for pending results.
1114 result = None
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001115 with self._outputs_exceptions_cond:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001116 if self._exceptions:
1117 e = self._exceptions.pop(0)
1118 raise e[0], e[1], e[2]
1119 if self._outputs:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001120 # Remember the result to yield it outside of the lock.
1121 result = self._outputs.pop(0)
1122 else:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001123 # No pending tasks -> all tasks are done.
1124 if not self._pending_count:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001125 return
1126 # Some task is queued, wait for its result to appear.
1127 # Use non-None timeout so that process reacts to Ctrl+C and other
1128 # signals, see http://bugs.python.org/issue8844.
1129 self._outputs_exceptions_cond.wait(timeout=5)
1130 continue
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001131 yield result
1132
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001133 def close(self):
1134 """Closes all the threads."""
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001135 # Ensure no new threads can be started, self._workers is effectively
1136 # a constant after that and can be accessed outside the lock.
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001137 with self._lock:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001138 if self._is_closed:
1139 raise ThreadPoolClosed('Can not close already closed ThreadPool')
1140 self._is_closed = True
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001141 for _ in range(len(self._workers)):
1142 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +00001143 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001144 for t in self._workers:
1145 t.join()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001146 logging.debug(
1147 'Thread pool \'%s\' closed: spawned %d threads total',
1148 self._prefix, len(self._workers))
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001149
1150 def __enter__(self):
1151 """Enables 'with' statement."""
1152 return self
1153
maruel@chromium.org97cd0be2013-03-13 14:01:36 +00001154 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001155 """Enables 'with' statement."""
1156 self.close()
1157
1158
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001159def valid_file(filepath, size):
1160 """Determines if the given files appears valid (currently it just checks
1161 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001162 if size == UNKNOWN_FILE_SIZE:
1163 return True
1164 actual_size = os.stat(filepath).st_size
1165 if size != actual_size:
1166 logging.warning(
1167 'Found invalid item %s; %d != %d',
1168 os.path.basename(filepath), actual_size, size)
1169 return False
1170 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001171
1172
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001173class Profiler(object):
1174 def __init__(self, name):
1175 self.name = name
1176 self.start_time = None
1177
1178 def __enter__(self):
1179 self.start_time = time.time()
1180 return self
1181
1182 def __exit__(self, _exc_type, _exec_value, _traceback):
1183 time_taken = time.time() - self.start_time
1184 logging.info('Profiling: Section %s took %3.3f seconds',
1185 self.name, time_taken)
1186
1187
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001188class DeadlockDetector(object):
1189 """Context manager that can detect deadlocks.
1190
1191 It will dump stack frames of all running threads if its 'ping' method isn't
1192 called in time.
1193
1194 Usage:
1195 with DeadlockDetector(timeout=60) as detector:
1196 for item in some_work():
1197 ...
1198 detector.ping()
1199 ...
1200
1201 Arguments:
1202 timeout - maximum allowed time between calls to 'ping'.
1203 """
1204
1205 def __init__(self, timeout):
1206 self.timeout = timeout
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001207 self._thread = None
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001208 # Thread stop condition. Also lock for shared variables below.
1209 self._stop_cv = threading.Condition()
1210 self._stop_flag = False
1211 # Time when 'ping' was called last time.
1212 self._last_ping = None
1213 # True if pings are coming on time.
1214 self._alive = True
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001215
1216 def __enter__(self):
1217 """Starts internal watcher thread."""
1218 assert self._thread is None
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001219 self.ping()
1220 self._thread = threading.Thread(name='deadlock-detector', target=self._run)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001221 self._thread.daemon = True
1222 self._thread.start()
1223 return self
1224
1225 def __exit__(self, *_args):
1226 """Stops internal watcher thread."""
1227 assert self._thread is not None
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001228 with self._stop_cv:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001229 self._stop_flag = True
1230 self._stop_cv.notify()
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001231 self._thread.join()
1232 self._thread = None
1233 self._stop_flag = False
1234
1235 def ping(self):
1236 """Notify detector that main thread is still running.
1237
1238 Should be called periodically to inform the detector that everything is
1239 running as it should.
1240 """
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001241 with self._stop_cv:
1242 self._last_ping = time.time()
1243 self._alive = True
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001244
1245 def _run(self):
1246 """Loop that watches for pings and dumps threads state if ping is late."""
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001247 with self._stop_cv:
1248 while not self._stop_flag:
1249 # Skipped deadline? Dump threads and switch to 'not alive' state.
1250 if self._alive and time.time() > self._last_ping + self.timeout:
1251 self.dump_threads(time.time() - self._last_ping, True)
1252 self._alive = False
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001253
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001254 # Pings are on time?
1255 if self._alive:
1256 # Wait until the moment we need to dump stack traces.
1257 # Most probably some other thread will call 'ping' to move deadline
1258 # further in time. We don't bother to wake up after each 'ping',
1259 # only right before initial expected deadline.
1260 self._stop_cv.wait(self._last_ping + self.timeout - time.time())
1261 else:
1262 # Skipped some pings previously. Just periodically silently check
1263 # for new pings with some arbitrary frequency.
1264 self._stop_cv.wait(self.timeout * 0.1)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001265
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001266 @staticmethod
1267 def dump_threads(timeout=None, skip_current_thread=False):
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001268 """Dumps stack frames of all running threads."""
1269 all_threads = threading.enumerate()
1270 current_thread_id = threading.current_thread().ident
1271
1272 # Collect tracebacks: thread name -> traceback string.
1273 tracebacks = {}
1274
1275 # pylint: disable=W0212
1276 for thread_id, frame in sys._current_frames().iteritems():
1277 # Don't dump deadlock detector's own thread, it's boring.
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001278 if thread_id == current_thread_id and not skip_current_thread:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001279 continue
1280
1281 # Try to get more informative symbolic thread name.
1282 name = 'untitled'
1283 for thread in all_threads:
1284 if thread.ident == thread_id:
1285 name = thread.name
1286 break
1287 name += ' #%d' % (thread_id,)
1288 tracebacks[name] = ''.join(traceback.format_stack(frame))
1289
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001290 # Function to print a message. Makes it easier to change output destination.
1291 def output(msg):
1292 logging.warning(msg.rstrip())
1293
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001294 # Print tracebacks, sorting them by thread name. That way a thread pool's
1295 # threads will be printed as one group.
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001296 output('=============== Potential deadlock detected ===============')
1297 if timeout is not None:
1298 output('No pings in last %d sec.' % (timeout,))
1299 output('Dumping stack frames for all threads:')
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001300 for name in sorted(tracebacks):
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001301 output('Traceback for \'%s\':\n%s' % (name, tracebacks[name]))
1302 output('===========================================================')
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001303
1304
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001305class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001306 """Priority based worker queue to fetch or upload files from a
1307 content-address server. Any function may be given as the fetcher/upload,
1308 as long as it takes two inputs (the item contents, and their relative
1309 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001310
1311 Supports local file system, CIFS or http remotes.
1312
1313 When the priority of items is equals, works in strict FIFO mode.
1314 """
1315 # Initial and maximum number of worker threads.
1316 INITIAL_WORKERS = 2
1317 MAX_WORKERS = 16
1318 # Priorities.
1319 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
1320 INTERNAL_PRIORITY_BITS = (1<<8) - 1
1321 RETRIES = 5
1322
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001323 def __init__(self, destination_root):
1324 # Function to fetch a remote object or upload to a remote location..
1325 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001326 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001327 self._done = Queue.PriorityQueue()
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001328 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0, 'remote')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001329
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001330 def join(self):
1331 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001332 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001333
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001334 def close(self):
1335 """Terminates all worker threads."""
1336 self._pool.close()
1337
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001338 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001339 """Retrieves an object from the remote data store.
1340
1341 The smaller |priority| gets fetched first.
1342
1343 Thread-safe.
1344 """
1345 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001346 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001347
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001348 def _add_item(self, priority, obj, dest, size):
1349 assert isinstance(obj, basestring), obj
1350 assert isinstance(dest, basestring), dest
1351 assert size is None or isinstance(size, int), size
1352 return self._pool.add_task(
1353 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001354
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001355 def get_one_result(self):
1356 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001357
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001358 def _task_executer(self, priority, obj, dest, size):
1359 """Wraps self._do_item to trap and retry on IOError exceptions."""
1360 try:
1361 self._do_item(obj, dest)
1362 if size and not valid_file(dest, size):
1363 download_size = os.stat(dest).st_size
1364 os.remove(dest)
1365 raise IOError('File incorrect size after download of %s. Got %s and '
1366 'expected %s' % (obj, download_size, size))
1367 # TODO(maruel): Technically, we'd want to have an output queue to be a
1368 # PriorityQueue.
1369 return obj
1370 except IOError as e:
1371 logging.debug('Caught IOError: %s', e)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001372 # Remove unfinished download.
1373 if os.path.exists(dest):
1374 os.remove(dest)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001375 # Retry a few times, lowering the priority.
1376 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
1377 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001378 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001379 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001380
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001381 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001382 """Returns a object to retrieve objects from a remote."""
1383 if re.match(r'^https?://.+$', file_or_url):
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001384 return functools.partial(self._download_file, file_or_url)
1385 else:
1386 return functools.partial(self._copy_file, file_or_url)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001387
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001388 @staticmethod
1389 def _download_file(base_url, item, dest):
1390 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1391 # easy.
1392 try:
1393 zipped_source = base_url + item
1394 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001395
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001396 # Because the app engine DB is only eventually consistent, retry
1397 # 404 errors because the file might just not be visible yet (even
1398 # though it has been uploaded).
1399 connection = url_open(zipped_source, retry_404=True,
1400 read_timeout=DOWNLOAD_READ_TIMEOUT)
1401 if not connection:
1402 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001403
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001404 content_length = connection.content_length
1405 decompressor = zlib.decompressobj()
1406 size = 0
1407 with open(dest, 'wb') as f:
1408 while True:
1409 chunk = connection.read(ZIPPED_FILE_CHUNK)
1410 if not chunk:
1411 break
1412 size += len(chunk)
1413 f.write(decompressor.decompress(chunk))
1414 # Ensure that all the data was properly decompressed.
1415 uncompressed_data = decompressor.flush()
1416 assert not uncompressed_data
1417 except IOError as e:
1418 logging.error('Failed to download %s at %s.\n%s', item, dest, e)
1419 raise
1420 except httplib.HTTPException as e:
1421 msg = 'HTTPException while retrieving %s at %s.\n%s' % (item, dest, e)
1422 logging.error(msg)
1423 raise IOError(msg)
1424 except zlib.error as e:
1425 msg = 'Corrupted zlib for item %s. Processed %d of %s bytes.\n%s' % (
1426 item, size, content_length, e)
1427 logging.error(msg)
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001428
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001429 # Testing seems to show that if a few machines are trying to download
1430 # the same blob, they can cause each other to fail. So if we hit a
1431 # zip error, this is the most likely cause (it only downloads some of
1432 # the data). Randomly sleep for between 5 and 25 seconds to try and
1433 # spread out the downloads.
1434 # TODO(csharp): Switch from blobstorage to cloud storage and see if
1435 # that solves the issue.
1436 sleep_duration = (random.random() * 20) + 5
1437 time.sleep(sleep_duration)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001438
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001439 raise IOError(msg)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001440
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001441 @staticmethod
1442 def _copy_file(base_path, item, dest):
1443 source = os.path.join(base_path, item)
1444 if source == dest:
1445 logging.info('Source and destination are the same, no action required')
1446 return
1447 logging.debug('copy_file(%s, %s)', source, dest)
1448 shutil.copy(source, dest)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001449
1450
1451class CachePolicies(object):
1452 def __init__(self, max_cache_size, min_free_space, max_items):
1453 """
1454 Arguments:
1455 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1456 cache is effectively a leak.
1457 - min_free_space: Trim if disk free space becomes lower than this value. If
1458 0, it unconditionally fill the disk.
1459 - max_items: Maximum number of items to keep in the cache. If 0, do not
1460 enforce a limit.
1461 """
1462 self.max_cache_size = max_cache_size
1463 self.min_free_space = min_free_space
1464 self.max_items = max_items
1465
1466
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001467class NoCache(object):
1468 """This class is intended to be usable everywhere the Cache class is.
1469 Instead of downloading to a cache, all files are downloaded to the target
1470 directory and then moved to where they are needed.
1471 """
1472
1473 def __init__(self, target_directory, remote):
1474 self.target_directory = target_directory
1475 self.remote = remote
1476
1477 def retrieve(self, priority, item, size):
1478 """Get the request file."""
1479 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001480 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001481
1482 def wait_for(self, items):
1483 """Download the first item of the given list if it is missing."""
1484 item = items.iterkeys().next()
1485
1486 if not os.path.exists(self.path(item)):
1487 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001488 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001489 assert downloaded == item
1490
1491 return item
1492
1493 def path(self, item):
1494 return os.path.join(self.target_directory, item)
1495
1496
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001497class Cache(object):
1498 """Stateful LRU cache.
1499
1500 Saves its state as json file.
1501 """
1502 STATE_FILE = 'state.json'
1503
1504 def __init__(self, cache_dir, remote, policies):
1505 """
1506 Arguments:
1507 - cache_dir: Directory where to place the cache.
1508 - remote: Remote where to fetch items from.
1509 - policies: cache retention policies.
1510 """
1511 self.cache_dir = cache_dir
1512 self.remote = remote
1513 self.policies = policies
1514 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001515 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1516 # self.state[0] is the oldest item.
1517 self.state = []
1518 self._state_need_to_be_saved = False
1519 # A lookup map to speed up searching.
1520 self._lookup = {}
1521 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001522
1523 # Items currently being fetched. Keep it local to reduce lock contention.
1524 self._pending_queue = set()
1525
1526 # Profiling values.
1527 self._added = []
1528 self._removed = []
1529 self._free_disk = 0
1530
maruel@chromium.org770993b2012-12-11 17:16:48 +00001531 with Profiler('Setup'):
1532 if not os.path.isdir(self.cache_dir):
1533 os.makedirs(self.cache_dir)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001534 if os.path.isfile(self.state_file):
1535 try:
1536 self.state = json.load(open(self.state_file, 'r'))
1537 except (IOError, ValueError), e:
1538 # Too bad. The file will be overwritten and the cache cleared.
1539 logging.error(
1540 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1541 self._state_need_to_be_saved = True
1542 if (not isinstance(self.state, list) or
1543 not all(
1544 isinstance(i, (list, tuple)) and len(i) == 2
1545 for i in self.state)):
1546 # Discard.
1547 self._state_need_to_be_saved = True
1548 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001549
maruel@chromium.org770993b2012-12-11 17:16:48 +00001550 # Ensure that all files listed in the state still exist and add new ones.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001551 previous = set(filename for filename, _ in self.state)
1552 if len(previous) != len(self.state):
1553 logging.warning('Cache state is corrupted, found duplicate files')
1554 self._state_need_to_be_saved = True
1555 self.state = []
1556
1557 added = 0
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001558 for filename in os.listdir(self.cache_dir):
1559 if filename == self.STATE_FILE:
1560 continue
1561 if filename in previous:
1562 previous.remove(filename)
1563 continue
1564 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001565 if not RE_IS_SHA1.match(filename):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001566 logging.warning('Removing unknown file %s from cache', filename)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001567 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001568 continue
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001569 # Insert as the oldest file. It will be deleted eventually if not
1570 # accessed.
1571 self._add(filename, False)
1572 logging.warning('Add unknown file %s to cache', filename)
1573 added += 1
maruel@chromium.org770993b2012-12-11 17:16:48 +00001574
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001575 if added:
1576 logging.warning('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001577 if previous:
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001578 logging.warning('Removed %d lost files', len(previous))
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001579 # Set explicitly in case self._add() wasn't called.
1580 self._state_need_to_be_saved = True
1581 # Filter out entries that were not found while keeping the previous
1582 # order.
1583 self.state = [
1584 (filename, size) for filename, size in self.state
1585 if filename not in previous
1586 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001587 self.trim()
1588
1589 def __enter__(self):
1590 return self
1591
1592 def __exit__(self, _exc_type, _exec_value, _traceback):
1593 with Profiler('CleanupTrimming'):
1594 self.trim()
1595
1596 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001597 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001598 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001599 '%5d (%8dkb) current',
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001600 len(self.state),
1601 sum(i[1] for i in self.state) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001602 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001603 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1604 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001605
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001606 def remove_file_at_index(self, index):
1607 """Removes the file at the given index."""
1608 try:
1609 self._state_need_to_be_saved = True
1610 filename, size = self.state.pop(index)
1611 # If the lookup was already stale, its possible the filename was not
1612 # present yet.
1613 self._lookup_is_stale = True
1614 self._lookup.pop(filename, None)
1615 self._removed.append(size)
1616 os.remove(self.path(filename))
1617 except OSError as e:
1618 logging.error('Error attempting to delete a file\n%s' % e)
1619
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001620 def remove_lru_file(self):
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001621 """Removes the last recently used file."""
1622 self.remove_file_at_index(0)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001623
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001624 def trim(self):
1625 """Trims anything we don't know, make sure enough free space exists."""
1626 # Ensure maximum cache size.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001627 if self.policies.max_cache_size and self.state:
1628 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1629 self.remove_lru_file()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001630
1631 # Ensure maximum number of items in the cache.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001632 if self.policies.max_items and self.state:
1633 while len(self.state) > self.policies.max_items:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001634 self.remove_lru_file()
1635
1636 # Ensure enough free space.
1637 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001638 trimmed_due_to_space = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001639 while (
1640 self.policies.min_free_space and
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001641 self.state and
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001642 self._free_disk < self.policies.min_free_space):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001643 trimmed_due_to_space = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001644 self.remove_lru_file()
1645 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001646 if trimmed_due_to_space:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001647 total = sum(i[1] for i in self.state)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001648 logging.warning(
1649 'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
1650 'cache (%.1f%% of its maximum capacity)',
1651 self._free_disk / 1024.,
1652 total / 1024.,
1653 100. * self.policies.max_cache_size / float(total),
1654 )
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001655 self.save()
1656
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001657 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001658 """Retrieves a file from the remote, if not already cached, and adds it to
1659 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001660
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001661 If the file is in the cache, verifiy that the file is valid (i.e. it is
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001662 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001663 """
1664 assert not '/' in item
1665 path = self.path(item)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001666 self._update_lookup()
1667 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001668
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001669 if index is not None:
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001670 if not valid_file(self.path(item), size):
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001671 self.remove_file_at_index(index)
1672 index = None
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001673 else:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001674 assert index < len(self.state)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001675 # Was already in cache. Update it's LRU value by putting it at the end.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001676 self._state_need_to_be_saved = True
1677 self._lookup_is_stale = True
1678 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001679
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001680 if index is None:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001681 if item in self._pending_queue:
1682 # Already pending. The same object could be referenced multiple times.
1683 return
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001684 # TODO(maruel): It should look at the free disk space, the current cache
1685 # size and the size of the new item on every new item:
1686 # - Trim the cache as more entries are listed when free disk space is low,
1687 # otherwise if the amount of data downloaded during the run > free disk
1688 # space, it'll crash.
1689 # - Make sure there's enough free disk space to fit all dependencies of
1690 # this run! If not, abort early.
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001691 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001692 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001693
1694 def add(self, filepath, obj):
1695 """Forcibly adds a file to the cache."""
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001696 self._update_lookup()
1697 if not obj in self._lookup:
maruel@chromium.orgba6489b2013-07-11 20:23:33 +00001698 link_file(self.path(obj), filepath, HARDLINK_WITH_FALLBACK)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001699 self._add(obj, True)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001700
1701 def path(self, item):
1702 """Returns the path to one item."""
1703 return os.path.join(self.cache_dir, item)
1704
1705 def save(self):
1706 """Saves the LRU ordering."""
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001707 if self._state_need_to_be_saved:
1708 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1709 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001710
1711 def wait_for(self, items):
1712 """Starts a loop that waits for at least one of |items| to be retrieved.
1713
1714 Returns the first item retrieved.
1715 """
1716 # Flush items already present.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001717 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001718 for item in items:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001719 if item in self._lookup:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001720 return item
1721
1722 assert all(i in self._pending_queue for i in items), (
1723 items, self._pending_queue)
1724 # Note that:
1725 # len(self._pending_queue) ==
1726 # ( len(self.remote._workers) - self.remote._ready +
1727 # len(self._remote._queue) + len(self._remote.done))
1728 # There is no lock-free way to verify that.
1729 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001730 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001731 self._pending_queue.remove(item)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001732 self._add(item, True)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001733 if item in items:
1734 return item
1735
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001736 def _add(self, item, at_end):
1737 """Adds an item in the internal state.
1738
1739 If |at_end| is False, self._lookup becomes inconsistent and
1740 self._update_lookup() must be called.
1741 """
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001742 size = os.stat(self.path(item)).st_size
1743 self._added.append(size)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001744 self._state_need_to_be_saved = True
1745 if at_end:
1746 self.state.append((item, size))
1747 self._lookup[item] = len(self.state) - 1
1748 else:
1749 self._lookup_is_stale = True
1750 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001751
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001752 def _update_lookup(self):
1753 if self._lookup_is_stale:
1754 self._lookup = dict(
1755 (filename, index) for index, (filename, _) in enumerate(self.state))
1756 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001757
1758
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001759class IsolatedFile(object):
1760 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001761 def __init__(self, obj_hash):
1762 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001763 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001764 self.obj_hash = obj_hash
1765 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001766 # .isolate and all the .isolated files recursively included by it with
1767 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1768 # .isolated file in the hash table, is important, as the later ones are not
1769 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001770 self.can_fetch = False
1771
1772 # Raw data.
1773 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001774 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001775 self.children = []
1776
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001777 # Set once the .isolated file is loaded.
1778 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001779 # Set once the files are fetched.
1780 self.files_fetched = False
1781
1782 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001783 """Verifies the .isolated file is valid and loads this object with the json
1784 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001785 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001786 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1787 assert not self._is_parsed
1788 self.data = load_isolated(content)
1789 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1790 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001791
1792 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001793 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001794
1795 Preemptively request files.
1796
1797 Note that |files| is modified by this function.
1798 """
1799 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001800 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001801 return
1802 logging.debug('fetch_files(%s)' % self.obj_hash)
1803 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001804 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001805 # overriden files must not be fetched.
1806 if filepath not in files:
1807 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001808 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001809 # Preemptively request files.
1810 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001811 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001812 self.files_fetched = True
1813
1814
1815class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001816 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001817 def __init__(self):
1818 self.command = []
1819 self.files = {}
1820 self.read_only = None
1821 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001822 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001823 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001824
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001825 def load(self, cache, root_isolated_hash):
1826 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001827
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001828 It enables support for "included" .isolated files. They are processed in
1829 strict order but fetched asynchronously from the cache. This is important so
1830 that a file in an included .isolated file that is overridden by an embedding
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001831 .isolated file is not fetched needlessly. The includes are fetched in one
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001832 pass and the files are fetched as soon as all the ones on the left-side
1833 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001834
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001835 The prioritization is very important here for nested .isolated files.
1836 'includes' have the highest priority and the algorithm is optimized for both
1837 deep and wide trees. A deep one is a long link of .isolated files referenced
1838 one at a time by one item in 'includes'. A wide one has a large number of
1839 'includes' in a single .isolated file. 'left' is defined as an included
1840 .isolated file earlier in the 'includes' list. So the order of the elements
1841 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001842 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001843 self.root = IsolatedFile(root_isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001844
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001845 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1846 pending = {}
1847 # Set of hashes of already retrieved items to refuse recursive includes.
1848 seen = set()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001849
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001850 def retrieve(isolated_file):
1851 h = isolated_file.obj_hash
1852 if h in seen:
1853 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1854 assert h not in pending
1855 seen.add(h)
1856 pending[h] = isolated_file
1857 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
1858
1859 retrieve(self.root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001860
1861 while pending:
1862 item_hash = cache.wait_for(pending)
1863 item = pending.pop(item_hash)
1864 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001865 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001866 # It's the root item.
1867 item.can_fetch = True
1868
1869 for new_child in item.children:
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001870 retrieve(new_child)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001871
1872 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001873 self._traverse_tree(cache, self.root)
1874
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001875 def check(n):
1876 return all(check(x) for x in n.children) and n.files_fetched
1877 assert check(self.root)
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001878
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001879 self.relative_cwd = self.relative_cwd or ''
1880 self.read_only = self.read_only or False
1881
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001882 def _traverse_tree(self, cache, node):
1883 if node.can_fetch:
1884 if not node.files_fetched:
1885 self._update_self(cache, node)
1886 will_break = False
1887 for i in node.children:
1888 if not i.can_fetch:
1889 if will_break:
1890 break
1891 # Automatically mark the first one as fetcheable.
1892 i.can_fetch = True
1893 will_break = True
1894 self._traverse_tree(cache, i)
1895
1896 def _update_self(self, cache, node):
1897 node.fetch_files(cache, self.files)
1898 # Grabs properties.
1899 if not self.command and node.data.get('command'):
1900 self.command = node.data['command']
1901 if self.read_only is None and node.data.get('read_only') is not None:
1902 self.read_only = node.data['read_only']
1903 if (self.relative_cwd is None and
1904 node.data.get('relative_cwd') is not None):
1905 self.relative_cwd = node.data['relative_cwd']
1906
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001907
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001908def create_directories(base_directory, files):
1909 """Creates the directory structure needed by the given list of files."""
1910 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1911 # Creates the tree of directories to create.
1912 directories = set(os.path.dirname(f) for f in files)
1913 for item in list(directories):
1914 while item:
1915 directories.add(item)
1916 item = os.path.dirname(item)
1917 for d in sorted(directories):
1918 if d:
1919 os.mkdir(os.path.join(base_directory, d))
1920
1921
1922def create_links(base_directory, files):
1923 """Creates any links needed by the given set of files."""
1924 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001925 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001926 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001927 if sys.platform == 'win32':
1928 # TODO(maruel): Create junctions or empty text files similar to what
1929 # cygwin do?
1930 logging.warning('Ignoring symlink %s', filepath)
1931 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001932 outfile = os.path.join(base_directory, filepath)
1933 # symlink doesn't exist on Windows. So the 'link' property should
1934 # never be specified for windows .isolated file.
1935 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1936 if 'm' in properties:
1937 lchmod = getattr(os, 'lchmod', None)
1938 if lchmod:
1939 lchmod(outfile, properties['m'])
1940
1941
1942def setup_commands(base_directory, cwd, cmd):
1943 """Correctly adjusts and then returns the required working directory
1944 and command needed to run the test.
1945 """
1946 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1947 cwd = os.path.join(base_directory, cwd)
1948 if not os.path.isdir(cwd):
1949 os.makedirs(cwd)
1950
1951 # Ensure paths are correctly separated on windows.
1952 cmd[0] = cmd[0].replace('/', os.path.sep)
1953 cmd = fix_python_path(cmd)
1954
1955 return cwd, cmd
1956
1957
1958def generate_remaining_files(files):
1959 """Generates a dictionary of all the remaining files to be downloaded."""
1960 remaining = {}
1961 for filepath, props in files:
1962 if 'h' in props:
1963 remaining.setdefault(props['h'], []).append((filepath, props))
1964
1965 return remaining
1966
1967
1968def download_test_data(isolated_hash, target_directory, remote):
1969 """Downloads the dependencies to the given directory."""
1970 if not os.path.exists(target_directory):
1971 os.makedirs(target_directory)
1972
1973 settings = Settings()
1974 no_cache = NoCache(target_directory, Remote(remote))
1975
1976 # Download all the isolated files.
1977 with Profiler('GetIsolateds') as _prof:
1978 settings.load(no_cache, isolated_hash)
1979
1980 if not settings.command:
1981 print >> sys.stderr, 'No command to run'
1982 return 1
1983
1984 with Profiler('GetRest') as _prof:
1985 create_directories(target_directory, settings.files)
1986 create_links(target_directory, settings.files.iteritems())
1987
1988 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1989 settings.command[:])
1990
1991 remaining = generate_remaining_files(settings.files.iteritems())
1992
1993 # Now block on the remaining files to be downloaded and mapped.
1994 logging.info('Retrieving remaining files')
1995 last_update = time.time()
1996 while remaining:
1997 obj = no_cache.wait_for(remaining)
1998 files = remaining.pop(obj)
1999
2000 for i, (filepath, properties) in enumerate(files):
2001 outfile = os.path.join(target_directory, filepath)
2002 logging.info(no_cache.path(obj))
2003
2004 if i + 1 == len(files):
2005 os.rename(no_cache.path(obj), outfile)
2006 else:
2007 shutil.copyfile(no_cache.path(obj), outfile)
2008
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00002009 if 'm' in properties and not sys.platform == 'win32':
2010 # It's not set on Windows. It could be set only in the case of
2011 # downloading content generated from another OS. Do not crash in that
2012 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002013 os.chmod(outfile, properties['m'])
2014
2015 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
csharp@chromium.org5daba352013-07-03 17:29:27 +00002016 msg = '%d files remaining...' % len(remaining)
2017 print msg
2018 logging.info(msg)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002019 last_update = time.time()
2020
2021 print('.isolated files successfully downloaded and setup in %s' %
2022 target_directory)
2023 print('To run this test please run the command %s from the directory %s' %
2024 (cmd, cwd))
2025
2026 return 0
2027
2028
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002029def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002030 """Downloads the dependencies in the cache, hardlinks them into a temporary
2031 directory and runs the executable.
2032 """
2033 settings = Settings()
2034 with Cache(cache_dir, Remote(remote), policies) as cache:
2035 outdir = make_temp_dir('run_tha_test', cache_dir)
2036 try:
2037 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002038 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002039 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002040 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002041 # Adds it in the cache. While not strictly necessary, this simplifies
2042 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002043 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002044 cache.add(isolated_hash, h)
2045 isolated_hash = h
2046 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002047
2048 if not settings.command:
2049 print >> sys.stderr, 'No command to run'
2050 return 1
2051
2052 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002053 create_directories(outdir, settings.files)
2054 create_links(outdir, settings.files.iteritems())
2055 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002056
2057 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002058 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
2059 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002060
2061 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00002062 logging.info('Retrieving remaining files')
2063 last_update = time.time()
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002064 with DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
2065 while remaining:
2066 detector.ping()
2067 obj = cache.wait_for(remaining)
2068 for filepath, properties in remaining.pop(obj):
2069 outfile = os.path.join(outdir, filepath)
maruel@chromium.orgb7c003d2013-07-24 13:04:30 +00002070 link_file(outfile, cache.path(obj), HARDLINK)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002071 if 'm' in properties:
2072 # It's not set on Windows.
2073 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002074
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002075 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
2076 msg = '%d files remaining...' % len(remaining)
2077 print msg
2078 logging.info(msg)
2079 last_update = time.time()
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00002080
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002081 if settings.read_only:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002082 logging.info('Making files read only')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002083 make_writable(outdir, True)
2084 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00002085
2086 # TODO(csharp): This should be specified somewhere else.
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00002087 # TODO(vadimsh): Pass it via 'env_vars' in manifest.
csharp@chromium.orge217f302012-11-22 16:51:53 +00002088 # Add a rotating log file if one doesn't already exist.
2089 env = os.environ.copy()
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00002090 env.setdefault('RUN_TEST_CASES_LOG_FILE',
2091 os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002092 try:
2093 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00002094 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002095 except OSError:
2096 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
2097 raise
2098 finally:
2099 rmtree(outdir)
2100
2101
maruel@chromium.orgea101982013-07-24 15:54:29 +00002102class OptionParserWithLogging(optparse.OptionParser):
2103 """Adds --verbose option."""
2104 def __init__(self, verbose=0, log_file=None, **kwargs):
2105 kwargs.setdefault('description', sys.modules['__main__'].__doc__)
2106 optparse.OptionParser.__init__(self, **kwargs)
2107 self.add_option(
2108 '-v', '--verbose',
2109 action='count',
2110 default=verbose,
2111 help='Use multiple times to increase verbosity')
2112 self.add_option(
2113 '-l', '--log_file',
2114 default=log_file,
2115 help='The name of the file to store rotating log details.')
2116
2117 def parse_args(self, *args, **kwargs):
2118 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
2119 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
2120 level = levels[min(len(levels) - 1, options.verbose)]
2121
2122 logging_console = logging.StreamHandler()
2123 logging_console.setFormatter(logging.Formatter(
2124 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
2125 logging_console.setLevel(level)
2126 logging.getLogger().setLevel(level)
2127 logging.getLogger().addHandler(logging_console)
2128
2129 if options.log_file:
2130 # This is necessary otherwise attached handler will miss the messages.
2131 logging.getLogger().setLevel(logging.DEBUG)
2132
2133 logging_rotating_file = logging.handlers.RotatingFileHandler(
2134 options.log_file,
2135 maxBytes=10 * 1024 * 1024,
2136 backupCount=5,
2137 encoding='utf-8')
2138 # log files are always at DEBUG level.
2139 logging_rotating_file.setLevel(logging.DEBUG)
2140 logging_rotating_file.setFormatter(logging.Formatter(
2141 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
2142 logging.getLogger().addHandler(logging_rotating_file)
2143
2144 return options, args
2145
2146
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002147def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00002148 disable_buffering()
maruel@chromium.orgea101982013-07-24 15:54:29 +00002149 parser = OptionParserWithLogging(
2150 usage='%prog <options>', log_file=RUN_ISOLATED_LOG_FILE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002151
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002152 group = optparse.OptionGroup(parser, 'Download')
2153 group.add_option(
2154 '--download', metavar='DEST',
2155 help='Downloads files to DEST and returns without running, instead of '
2156 'downloading and then running from a temporary directory.')
2157 parser.add_option_group(group)
2158
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002159 group = optparse.OptionGroup(parser, 'Data source')
2160 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002161 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002162 metavar='FILE',
2163 help='File/url describing what to map or run')
2164 group.add_option(
2165 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002166 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002167 parser.add_option_group(group)
2168
2169 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002170 '-r', '--remote', metavar='URL',
2171 default=
2172 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
2173 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002174 group = optparse.OptionGroup(parser, 'Cache management')
2175 group.add_option(
2176 '--cache',
2177 default='cache',
2178 metavar='DIR',
2179 help='Cache directory, default=%default')
2180 group.add_option(
2181 '--max-cache-size',
2182 type='int',
2183 metavar='NNN',
2184 default=20*1024*1024*1024,
2185 help='Trim if the cache gets larger than this value, default=%default')
2186 group.add_option(
2187 '--min-free-space',
2188 type='int',
2189 metavar='NNN',
maruel@chromium.org9e98e432013-05-31 17:06:51 +00002190 default=2*1024*1024*1024,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002191 help='Trim if disk free space becomes lower than this value, '
2192 'default=%default')
2193 group.add_option(
2194 '--max-items',
2195 type='int',
2196 metavar='NNN',
2197 default=100000,
2198 help='Trim if more than this number of items are in the cache '
2199 'default=%default')
2200 parser.add_option_group(group)
2201
2202 options, args = parser.parse_args()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002203
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002204 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00002205 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002206 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002207 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00002208 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002209 parser.error('Unsupported args %s' % ' '.join(args))
2210
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002211 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002212 policies = CachePolicies(
2213 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002214
2215 if options.download:
2216 return download_test_data(options.isolated or options.hash,
2217 options.download, options.remote)
2218 else:
2219 try:
2220 return run_tha_test(
2221 options.isolated or options.hash,
2222 options.cache,
2223 options.remote,
2224 policies)
2225 except Exception, e:
2226 # Make sure any exception is logged.
2227 logging.exception(e)
2228 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002229
2230
2231if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00002232 # Ensure that we are always running with the correct encoding.
2233 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002234 sys.exit(main())