blob: 22738ce9a1b4fc60b7932c4f15ada96781810cac [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000016import itertools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000018import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000019import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000020import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000021import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000022import optparse
23import os
24import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000025import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000026import re
27import shutil
28import stat
29import subprocess
30import sys
31import tempfile
32import threading
33import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000034import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000035import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000036import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000037import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000038import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000039
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000040# Try to import 'upload' module used by AppEngineService for authentication.
41# If it is not there, app engine authentication support will be disabled.
42try:
43 from third_party import upload
44 # Hack out upload logging.info()
45 upload.logging = logging.getLogger('upload')
46 # Mac pylint choke on this line.
47 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
48except ImportError:
49 upload = None
50
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000051
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000052# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000053HARDLINK, SYMLINK, COPY = range(1, 4)
54
55RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
56
csharp@chromium.org8dc52542012-11-08 20:29:55 +000057# The file size to be used when we don't know the correct file size,
58# generally used for .isolated files.
59UNKNOWN_FILE_SIZE = None
60
csharp@chromium.orga92403f2012-11-20 15:13:59 +000061# The size of each chunk to read when downloading and unzipping files.
62ZIPPED_FILE_CHUNK = 16 * 1024
63
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000064# The name of the log file to use.
65RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
66
csharp@chromium.orge217f302012-11-22 16:51:53 +000067# The base directory containing this file.
68BASE_DIR = os.path.dirname(os.path.abspath(__file__))
69
70# The name of the log to use for the run_test_cases.py command
71RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
72
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000073# The delay (in seconds) to wait between logging statements when retrieving
74# the required files. This is intended to let the user (or buildbot) know that
75# the program is still running.
76DELAY_BETWEEN_UPDATES_IN_SECS = 30
77
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000078# The name of the key to store the count of url attempts.
79COUNT_KEY = 'UrlOpenAttempt'
80
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000081# Default maximum number of attempts to trying opening a url before aborting.
82URL_OPEN_MAX_ATTEMPTS = 30
83# Default timeout when retrying.
84URL_OPEN_TIMEOUT = 6*60.
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000085
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000086# Global (for now) map: server URL (http://example.com) -> HttpService instance.
87# Used by get_http_service to cache HttpService instances.
88_http_services = {}
89_http_services_lock = threading.Lock()
90
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +000091# Used by get_flavor().
92FLAVOR_MAPPING = {
93 'cygwin': 'win',
94 'win32': 'win',
95 'darwin': 'mac',
96 'sunos5': 'solaris',
97 'freebsd7': 'freebsd',
98 'freebsd8': 'freebsd',
99}
100
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000101
102class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000103 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000104 pass
105
106
107class MappingError(OSError):
108 """Failed to recreate the tree."""
109 pass
110
111
112def get_flavor():
113 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000114 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000115
116
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000117def fix_default_encoding():
118 """Forces utf8 solidly on all platforms.
119
120 By default python execution environment is lazy and defaults to ascii
121 encoding.
122
123 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
124 """
125 if sys.getdefaultencoding() == 'utf-8':
126 return False
127
128 # Regenerate setdefaultencoding.
129 reload(sys)
130 # Module 'sys' has no 'setdefaultencoding' member
131 # pylint: disable=E1101
132 sys.setdefaultencoding('utf-8')
133 for attr in dir(locale):
134 if attr[0:3] != 'LC_':
135 continue
136 aref = getattr(locale, attr)
137 try:
138 locale.setlocale(aref, '')
139 except locale.Error:
140 continue
141 try:
142 lang = locale.getlocale(aref)[0]
143 except (TypeError, ValueError):
144 continue
145 if lang:
146 try:
147 locale.setlocale(aref, (lang, 'UTF-8'))
148 except locale.Error:
149 os.environ[attr] = lang + '.UTF-8'
150 try:
151 locale.setlocale(locale.LC_ALL, '')
152 except locale.Error:
153 pass
154 return True
155
156
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000157class Unbuffered(object):
158 """Disable buffering on a file object."""
159 def __init__(self, stream):
160 self.stream = stream
161
162 def write(self, data):
163 self.stream.write(data)
164 if '\n' in data:
165 self.stream.flush()
166
167 def __getattr__(self, attr):
168 return getattr(self.stream, attr)
169
170
171def disable_buffering():
172 """Makes this process and child processes stdout unbuffered."""
173 if not os.environ.get('PYTHONUNBUFFERED'):
174 # Since sys.stdout is a C++ object, it's impossible to do
175 # sys.stdout.write = lambda...
176 sys.stdout = Unbuffered(sys.stdout)
177 os.environ['PYTHONUNBUFFERED'] = 'x'
178
179
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000180def os_link(source, link_name):
181 """Add support for os.link() on Windows."""
182 if sys.platform == 'win32':
183 if not ctypes.windll.kernel32.CreateHardLinkW(
184 unicode(link_name), unicode(source), 0):
185 raise OSError()
186 else:
187 os.link(source, link_name)
188
189
190def readable_copy(outfile, infile):
191 """Makes a copy of the file that is readable by everyone."""
192 shutil.copy(infile, outfile)
193 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
194 stat.S_IRGRP | stat.S_IROTH)
195 os.chmod(outfile, read_enabled_mode)
196
197
198def link_file(outfile, infile, action):
199 """Links a file. The type of link depends on |action|."""
200 logging.debug('Mapping %s to %s' % (infile, outfile))
201 if action not in (HARDLINK, SYMLINK, COPY):
202 raise ValueError('Unknown mapping action %s' % action)
203 if not os.path.isfile(infile):
204 raise MappingError('%s is missing' % infile)
205 if os.path.isfile(outfile):
206 raise MappingError(
207 '%s already exist; insize:%d; outsize:%d' %
208 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
209
210 if action == COPY:
211 readable_copy(outfile, infile)
212 elif action == SYMLINK and sys.platform != 'win32':
213 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000214 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000215 else:
216 try:
217 os_link(infile, outfile)
218 except OSError:
219 # Probably a different file system.
maruel@chromium.org9e98e432013-05-31 17:06:51 +0000220 logging.warning(
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000221 'Failed to hardlink, failing back to copy %s to %s' % (
222 infile, outfile))
223 readable_copy(outfile, infile)
224
225
226def _set_write_bit(path, read_only):
227 """Sets or resets the executable bit on a file or directory."""
228 mode = os.lstat(path).st_mode
229 if read_only:
230 mode = mode & 0500
231 else:
232 mode = mode | 0200
233 if hasattr(os, 'lchmod'):
234 os.lchmod(path, mode) # pylint: disable=E1101
235 else:
236 if stat.S_ISLNK(mode):
237 # Skip symlink without lchmod() support.
238 logging.debug('Can\'t change +w bit on symlink %s' % path)
239 return
240
241 # TODO(maruel): Implement proper DACL modification on Windows.
242 os.chmod(path, mode)
243
244
245def make_writable(root, read_only):
246 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000247 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000248 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
249 for filename in filenames:
250 _set_write_bit(os.path.join(dirpath, filename), read_only)
251
252 for dirname in dirnames:
253 _set_write_bit(os.path.join(dirpath, dirname), read_only)
254
255
256def rmtree(root):
257 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
258 make_writable(root, False)
259 if sys.platform == 'win32':
260 for i in range(3):
261 try:
262 shutil.rmtree(root)
263 break
264 except WindowsError: # pylint: disable=E0602
265 delay = (i+1)*2
266 print >> sys.stderr, (
267 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
268 time.sleep(delay)
269 else:
270 shutil.rmtree(root)
271
272
273def is_same_filesystem(path1, path2):
274 """Returns True if both paths are on the same filesystem.
275
276 This is required to enable the use of hardlinks.
277 """
278 assert os.path.isabs(path1), path1
279 assert os.path.isabs(path2), path2
280 if sys.platform == 'win32':
281 # If the drive letter mismatches, assume it's a separate partition.
282 # TODO(maruel): It should look at the underlying drive, a drive letter could
283 # be a mount point to a directory on another drive.
284 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
285 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
286 if path1[0].lower() != path2[0].lower():
287 return False
288 return os.stat(path1).st_dev == os.stat(path2).st_dev
289
290
291def get_free_space(path):
292 """Returns the number of free bytes."""
293 if sys.platform == 'win32':
294 free_bytes = ctypes.c_ulonglong(0)
295 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
296 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
297 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000298 # For OSes other than Windows.
299 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000300 return f.f_bfree * f.f_frsize
301
302
303def make_temp_dir(prefix, root_dir):
304 """Returns a temporary directory on the same file system as root_dir."""
305 base_temp_dir = None
306 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
307 base_temp_dir = os.path.dirname(root_dir)
308 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
309
310
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000311def load_isolated(content):
312 """Verifies the .isolated file is valid and loads this object with the json
313 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000314 """
315 try:
316 data = json.loads(content)
317 except ValueError:
318 raise ConfigError('Failed to parse: %s...' % content[:100])
319
320 if not isinstance(data, dict):
321 raise ConfigError('Expected dict, got %r' % data)
322
323 for key, value in data.iteritems():
324 if key == 'command':
325 if not isinstance(value, list):
326 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000327 if not value:
328 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000329 for subvalue in value:
330 if not isinstance(subvalue, basestring):
331 raise ConfigError('Expected string, got %r' % subvalue)
332
333 elif key == 'files':
334 if not isinstance(value, dict):
335 raise ConfigError('Expected dict, got %r' % value)
336 for subkey, subvalue in value.iteritems():
337 if not isinstance(subkey, basestring):
338 raise ConfigError('Expected string, got %r' % subkey)
339 if not isinstance(subvalue, dict):
340 raise ConfigError('Expected dict, got %r' % subvalue)
341 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000342 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000343 if not isinstance(subsubvalue, basestring):
344 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000345 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000346 if not isinstance(subsubvalue, int):
347 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000348 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 if not RE_IS_SHA1.match(subsubvalue):
350 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000351 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000352 if not isinstance(subsubvalue, int):
353 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000354 else:
355 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000356 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000357 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000358 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
359 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000360
361 elif key == 'includes':
362 if not isinstance(value, list):
363 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000364 if not value:
365 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000366 for subvalue in value:
367 if not RE_IS_SHA1.match(subvalue):
368 raise ConfigError('Expected sha-1, got %r' % subvalue)
369
370 elif key == 'read_only':
371 if not isinstance(value, bool):
372 raise ConfigError('Expected bool, got %r' % value)
373
374 elif key == 'relative_cwd':
375 if not isinstance(value, basestring):
376 raise ConfigError('Expected string, got %r' % value)
377
378 elif key == 'os':
379 if value != get_flavor():
380 raise ConfigError(
381 'Expected \'os\' to be \'%s\' but got \'%s\'' %
382 (get_flavor(), value))
383
384 else:
385 raise ConfigError('Unknown key %s' % key)
386
387 return data
388
389
390def fix_python_path(cmd):
391 """Returns the fixed command line to call the right python executable."""
392 out = cmd[:]
393 if out[0] == 'python':
394 out[0] = sys.executable
395 elif out[0].endswith('.py'):
396 out.insert(0, sys.executable)
397 return out
398
399
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000400def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000401 """Attempts to open the given url multiple times.
402
403 |data| can be either:
404 -None for a GET request
405 -str for pre-encoded data
406 -list for data to be encoded
407 -dict for data to be encoded (COUNT_KEY will be added in this case)
408
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000409 Returns a file-like object, where the response may be read from, or None
410 if it was unable to connect.
411 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000412 urlhost, urlpath = split_server_request_url(url)
413 service = get_http_service(urlhost)
414 return service.request(urlpath, **kwargs)
415
416
417def split_server_request_url(url):
418 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000419 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000420 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
421 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
422 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000423
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000424
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000425def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000426 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000427 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000428 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000429 # Ensure consistency.
430 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000431 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000432 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000433 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000434 service = AppEngineService(urlhost)
435 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000436 return service
437
438
439class HttpService(object):
440 """Base class for a class that provides an API to HTTP based service:
441 - Provides 'request' method.
442 - Supports automatic request retries.
443 - Supports persistent cookies.
444 - Thread safe.
445 """
446
447 # File to use to store all auth cookies.
maruel@chromium.org16452a32013-04-05 00:18:44 +0000448 COOKIE_FILE = os.path.join('~', '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000449
450 # CookieJar reused by all services + lock that protects its instantiation.
451 _cookie_jar = None
452 _cookie_jar_lock = threading.Lock()
453
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000454 def __init__(self, urlhost):
455 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000456 self.cookie_jar = self.load_cookie_jar()
457 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000458
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000459 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000460 """Called when HTTP server asks client to authenticate.
461 Can be implemented in subclasses.
462 """
463 return False
464
465 @staticmethod
466 def load_cookie_jar():
467 """Returns global CoookieJar object that stores cookies in the file."""
468 with HttpService._cookie_jar_lock:
469 if HttpService._cookie_jar is not None:
470 return HttpService._cookie_jar
471 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
472 jar.load()
473 HttpService._cookie_jar = jar
474 return jar
475
476 @staticmethod
477 def save_cookie_jar():
478 """Called when cookie jar needs to be flushed to disk."""
479 with HttpService._cookie_jar_lock:
480 if HttpService._cookie_jar is not None:
481 HttpService._cookie_jar.save()
482
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000483 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000484 """Returns OpenerDirector that will be used when sending requests.
485 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000486 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000487
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000488 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000489 """Attempts to open the given url multiple times.
490
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000491 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000492
493 |data| can be either:
494 -None for a GET request
495 -str for pre-encoded data
496 -list for data to be encoded
497 -dict for data to be encoded (COUNT_KEY will be added in this case)
498
499 Returns a file-like object, where the response may be read from, or None
500 if it was unable to connect.
501 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000502 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000503
504 if isinstance(data, dict) and COUNT_KEY in data:
505 logging.error('%s already existed in the data passed into UlrOpen. It '
506 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
507 return None
508
509 method = 'GET' if data is None else 'POST'
510 assert not ((method != 'POST') and content_type), (
511 'Can\'t use content_type on GET')
512
513 def make_request(extra):
514 """Returns a urllib2.Request instance for this specific retry."""
515 if isinstance(data, str) or data is None:
516 payload = data
517 else:
518 if isinstance(data, dict):
519 payload = data.items()
520 else:
521 payload = data[:]
522 payload.extend(extra.iteritems())
523 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000524 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000525 if isinstance(data, str) or data is None:
526 # In these cases, add the extra parameter to the query part of the url.
527 url_parts = list(urlparse.urlparse(new_url))
528 # Append the query parameter.
529 if url_parts[4] and extra:
530 url_parts[4] += '&'
531 url_parts[4] += urllib.urlencode(extra)
532 new_url = urlparse.urlunparse(url_parts)
533 request = urllib2.Request(new_url, data=payload)
534 if payload is not None:
535 if content_type:
536 request.add_header('Content-Type', content_type)
537 request.add_header('Content-Length', len(payload))
538 return request
539
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000540 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000541
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000542 def _retry_loop(
543 self,
544 make_request,
545 max_attempts=URL_OPEN_MAX_ATTEMPTS,
546 retry_404=False,
547 retry_50x=True,
548 timeout=URL_OPEN_TIMEOUT):
549 """Runs internal request-retry loop.
550
551 - Optionally retries HTTP 404 and 50x.
552 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
553 number of retries.
554 - Retries up to |timeout| duration in seconds. If None or 0, there's no
555 limit in the time taken to do retries.
556 - If both |max_attempts| and |timeout| are None or 0, this functions retries
557 indefinitely.
558 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000559 authenticated = False
560 last_error = None
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000561 attempt = 0
562 start = self._now()
563 for attempt in itertools.count():
564 if max_attempts and attempt >= max_attempts:
565 # Too many attempts.
566 break
567 if timeout and (self._now() - start) >= timeout:
568 # Retried for too long.
569 break
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000570 extra = {COUNT_KEY: attempt} if attempt else {}
571 request = make_request(extra)
572 try:
573 url_response = self._url_open(request)
574 logging.debug('url_open(%s) succeeded', request.get_full_url())
575 return url_response
576 except urllib2.HTTPError as e:
577 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000578 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000579 # Try to authenticate only once. If it doesn't help, then server does
580 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000581 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000582 'Authentication is required for %s on attempt %d.\n%s',
583 request.get_full_url(), attempt,
584 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000585 if not authenticated and self.authenticate():
586 authenticated = True
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000587 # Do not sleep.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000588 continue
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000589 # If authentication failed, return.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000590 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000591 'Unable to authenticate to %s.\n%s',
592 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000593 return None
594
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000595 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
596 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000597 # This HTTPError means we reached the server and there was a problem
598 # with the request, so don't retry.
599 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000600 'Able to connect to %s but an exception was thrown.\n%s',
601 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000602 return None
603
604 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000605 logging.warning('Able to connect to %s on attempt %d.\n%s',
606 request.get_full_url(), attempt,
607 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000608 last_error = e
609
610 except (urllib2.URLError, httplib.HTTPException) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000611 logging.warning('Unable to open url %s on attempt %d.\n%s',
612 request.get_full_url(), attempt,
613 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000614 last_error = e
615
616 # Only sleep if we are going to try again.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000617 if max_attempts and attempt != max_attempts:
618 remaining = None
619 if timeout:
620 remaining = timeout - (self._now() - start)
621 if remaining <= 0:
622 break
623 self.sleep_before_retry(attempt, remaining)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000624
625 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000626 request.get_full_url(), max_attempts,
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000627 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000628 return None
629
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000630 def _url_open(self, request):
631 """Low level method to execute urllib2.Request's.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000632
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000633 To be mocked in tests.
634 """
635 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000636
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000637 @staticmethod
638 def _now():
639 """To be mocked in tests."""
640 return time.time()
641
642 @staticmethod
643 def calculate_sleep_before_retry(attempt, max_duration):
644 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
645 # survive.
646 MAX_SLEEP = 10.
647 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
648 # time by starting with 1.5/2+1.5^-1 median offset.
649 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
650 assert duration > 0.1
651 duration = min(MAX_SLEEP, duration)
652 if max_duration:
653 duration = min(max_duration, duration)
654 return duration
655
656 @classmethod
657 def sleep_before_retry(cls, attempt, max_duration):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000658 """Sleeps for some amount of time when retrying the request.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000659
660 To be mocked in tests.
661 """
662 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
maruel@chromium.orgef333122013-03-12 20:36:40 +0000663
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000664 @staticmethod
665 def _format_exception(exc, verbose=False):
666 """Given an instance of some exception raised by urlopen returns human
667 readable piece of text with detailed information about the error.
668 """
669 out = ['Exception: %s' % (exc,)]
670 if verbose:
671 if isinstance(exc, urllib2.HTTPError):
672 out.append('-' * 10)
673 if exc.hdrs:
674 for header, value in exc.hdrs.items():
675 if not header.startswith('x-'):
676 out.append('%s: %s' % (header.capitalize(), value))
677 out.append('')
678 out.append(exc.read() or '<empty body>')
679 out.append('-' * 10)
680 return '\n'.join(out)
681
maruel@chromium.orgef333122013-03-12 20:36:40 +0000682
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000683class AppEngineService(HttpService):
684 """This class implements authentication support for
685 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000686 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000687
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000688 # This lock ensures that user won't be confused with multiple concurrent
689 # login prompts.
690 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000691
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000692 def __init__(self, urlhost, email=None, password=None):
693 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000694 self.email = email
695 self.password = password
696 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000697
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000698 def authenticate(self):
699 """Authenticates in the app engine application.
700 Returns True on success.
701 """
702 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000703 logging.error('\'upload\' module is missing, '
704 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000705 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000706 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000707 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000708 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000709 class AuthServer(upload.AbstractRpcServer):
710 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000711 # Authentication code needs to know about 302 response.
712 # So make OpenerDirector without HTTPRedirectHandler.
713 opener = urllib2.OpenerDirector()
714 opener.add_handler(urllib2.ProxyHandler())
715 opener.add_handler(urllib2.UnknownHandler())
716 opener.add_handler(urllib2.HTTPHandler())
717 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
718 opener.add_handler(urllib2.HTTPSHandler())
719 opener.add_handler(urllib2.HTTPErrorProcessor())
720 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000721 return opener
722 def PerformAuthentication(self):
723 self._Authenticate()
724 save_cookie_jar()
725 return self.authenticated
726 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000727 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000728 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000729
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000730 def get_credentials(self):
731 """Called during authentication process to get the credentials.
732 May be called mutliple times if authentication fails.
733 Returns tuple (email, password).
734 """
735 # 'authenticate' calls this only if 'upload' is present.
736 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
737 assert upload, '\'upload\' module is required for this to work'
738 if self.email and self.password:
739 return (self.email, self.password)
740 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000741 self._keyring = upload.KeyringCreds(self.urlhost,
742 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000743 self.email)
744 return self._keyring.GetUserCredentials()
745
746
747class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
748 """MozillaCookieJar with thread safe load and save."""
749
750 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
751 """Loads cookies from the file if it exists."""
752 filename = filename or self.filename
753 with self._cookies_lock:
754 if os.path.exists(filename):
755 try:
756 cookielib.MozillaCookieJar.load(self, filename,
757 ignore_discard,
758 ignore_expires)
759 logging.debug('Loaded cookies from %s', filename)
760 except (cookielib.LoadError, IOError):
761 pass
762 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000763 try:
764 fd = os.open(filename, os.O_CREAT, 0600)
765 os.close(fd)
766 except OSError:
767 logging.error('Failed to create %s', filename)
768 try:
769 os.chmod(filename, 0600)
770 except OSError:
771 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000772
773 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
774 """Saves cookies to the file, completely overwriting it."""
775 logging.debug('Saving cookies to %s', filename or self.filename)
776 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000777 try:
778 cookielib.MozillaCookieJar.save(self, filename,
779 ignore_discard,
780 ignore_expires)
781 except OSError:
782 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000783
784
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000785class ThreadPool(object):
786 """Implements a multithreaded worker pool oriented for mapping jobs with
787 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000788
789 Arguments:
790 - initial_threads: Number of threads to start immediately. Can be 0 if it is
791 uncertain that threads will be needed.
792 - max_threads: Maximum number of threads that will be started when all the
793 threads are busy working. Often the number of CPU cores.
794 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
795 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000796 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000797 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000798
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000799 def __init__(self, initial_threads, max_threads, queue_size):
800 logging.debug(
801 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
802 assert initial_threads <= max_threads
803 # Update this check once 256 cores CPU are common.
804 assert max_threads <= 256
805
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000806 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000807 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000808
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000809 # Mutables.
810 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000811 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000812 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000813 self._outputs = []
814 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000815 # Number of threads in wait state.
816 self._ready_lock = threading.Lock()
817 self._ready = 0
818 self._workers_lock = threading.Lock()
819 self._workers = []
820 for _ in range(initial_threads):
821 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000822
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000823 def _add_worker(self):
824 """Adds one worker thread if there isn't too many. Thread-safe."""
825 # Better to take the lock two times than hold it for too long.
826 with self._workers_lock:
827 if len(self._workers) >= self._max_threads:
828 return False
829 worker = threading.Thread(target=self._run)
830 with self._workers_lock:
831 if len(self._workers) >= self._max_threads:
832 return False
833 self._workers.append(worker)
834 worker.daemon = True
835 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000836
maruel@chromium.org831958f2013-01-22 15:01:46 +0000837 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000838 """Adds a task, a function to be executed by a worker.
839
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000840 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000841 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000842
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000843 |func| can either return a return value to be added to the output list or
844 be a generator which can emit multiple values.
845
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000846 Returns the index of the item added, e.g. the total number of enqueued items
847 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000848 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000849 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000850 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000851 with self._ready_lock:
852 start_new_worker = not self._ready
853 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000854 self._num_of_added_tasks += 1
855 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000856 self.tasks.put((priority, index, func, args, kwargs))
857 if start_new_worker:
858 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000859 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000860
861 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000862 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000863 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000864 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000865 with self._ready_lock:
866 self._ready += 1
867 task = self.tasks.get()
868 finally:
869 with self._ready_lock:
870 self._ready -= 1
871 try:
872 if task is None:
873 # We're done.
874 return
875 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000876 if inspect.isgeneratorfunction(func):
877 for out in func(*args, **kwargs):
878 self._output_append(out)
879 else:
880 out = func(*args, **kwargs)
881 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000882 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000883 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000884 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000885 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000886 self._outputs_exceptions_cond.acquire()
887 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000888 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000889 self._outputs_exceptions_cond.notifyAll()
890 finally:
891 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000892 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000893 try:
894 self.tasks.task_done()
895 except Exception as e:
896 # We need to catch and log this error here because this is the root
897 # function for the thread, nothing higher will catch the error.
898 logging.exception('Caught exception while marking task as done: %s',
899 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000900
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000901 def _output_append(self, out):
902 if out is not None:
903 self._outputs_exceptions_cond.acquire()
904 try:
905 self._outputs.append(out)
906 self._outputs_exceptions_cond.notifyAll()
907 finally:
908 self._outputs_exceptions_cond.release()
909
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000910 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000911 """Extracts all the results from each threads unordered.
912
913 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000914
915 Note: will wait for all work items to be done before returning an exception.
916 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000917 """
918 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000919 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000920 self._outputs_exceptions_cond.acquire()
921 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000922 if self._exceptions:
923 e = self._exceptions.pop(0)
924 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000925 out = self._outputs
926 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000927 finally:
928 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000929 return out
930
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000931 def get_one_result(self):
932 """Returns the next item that was generated or raises an exception if one
933 occured.
934
935 Warning: this function will hang if there is no work item left. Use join
936 instead.
937 """
938 self._outputs_exceptions_cond.acquire()
939 try:
940 while True:
941 if self._exceptions:
942 e = self._exceptions.pop(0)
943 raise e[0], e[1], e[2]
944 if self._outputs:
945 return self._outputs.pop(0)
946 self._outputs_exceptions_cond.wait()
947 finally:
948 self._outputs_exceptions_cond.release()
949
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000950 def close(self):
951 """Closes all the threads."""
952 for _ in range(len(self._workers)):
953 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000954 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000955 for t in self._workers:
956 t.join()
957
958 def __enter__(self):
959 """Enables 'with' statement."""
960 return self
961
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000962 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000963 """Enables 'with' statement."""
964 self.close()
965
966
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000967def valid_file(filepath, size):
968 """Determines if the given files appears valid (currently it just checks
969 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000970 if size == UNKNOWN_FILE_SIZE:
971 return True
972 actual_size = os.stat(filepath).st_size
973 if size != actual_size:
974 logging.warning(
975 'Found invalid item %s; %d != %d',
976 os.path.basename(filepath), actual_size, size)
977 return False
978 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000979
980
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000981class Profiler(object):
982 def __init__(self, name):
983 self.name = name
984 self.start_time = None
985
986 def __enter__(self):
987 self.start_time = time.time()
988 return self
989
990 def __exit__(self, _exc_type, _exec_value, _traceback):
991 time_taken = time.time() - self.start_time
992 logging.info('Profiling: Section %s took %3.3f seconds',
993 self.name, time_taken)
994
995
996class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000997 """Priority based worker queue to fetch or upload files from a
998 content-address server. Any function may be given as the fetcher/upload,
999 as long as it takes two inputs (the item contents, and their relative
1000 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001001
1002 Supports local file system, CIFS or http remotes.
1003
1004 When the priority of items is equals, works in strict FIFO mode.
1005 """
1006 # Initial and maximum number of worker threads.
1007 INITIAL_WORKERS = 2
1008 MAX_WORKERS = 16
1009 # Priorities.
1010 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
1011 INTERNAL_PRIORITY_BITS = (1<<8) - 1
1012 RETRIES = 5
1013
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001014 def __init__(self, destination_root):
1015 # Function to fetch a remote object or upload to a remote location..
1016 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001017 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001018 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001019 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001020
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001021 def join(self):
1022 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001023 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001024
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001025 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001026 """Retrieves an object from the remote data store.
1027
1028 The smaller |priority| gets fetched first.
1029
1030 Thread-safe.
1031 """
1032 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001033 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001034
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001035 def _add_item(self, priority, obj, dest, size):
1036 assert isinstance(obj, basestring), obj
1037 assert isinstance(dest, basestring), dest
1038 assert size is None or isinstance(size, int), size
1039 return self._pool.add_task(
1040 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001041
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001042 def get_one_result(self):
1043 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001044
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001045 def _task_executer(self, priority, obj, dest, size):
1046 """Wraps self._do_item to trap and retry on IOError exceptions."""
1047 try:
1048 self._do_item(obj, dest)
1049 if size and not valid_file(dest, size):
1050 download_size = os.stat(dest).st_size
1051 os.remove(dest)
1052 raise IOError('File incorrect size after download of %s. Got %s and '
1053 'expected %s' % (obj, download_size, size))
1054 # TODO(maruel): Technically, we'd want to have an output queue to be a
1055 # PriorityQueue.
1056 return obj
1057 except IOError as e:
1058 logging.debug('Caught IOError: %s', e)
1059 # Retry a few times, lowering the priority.
1060 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
1061 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001062 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001063 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001064
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001065 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001066 """Returns a object to retrieve objects from a remote."""
1067 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001068 def download_file(item, dest):
1069 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1070 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001071 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +00001072 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001073 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001074
1075 # Because the app engine DB is only eventually consistent, retry
1076 # 404 errors because the file might just not be visible yet (even
1077 # though it has been uploaded).
1078 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +00001079 if not connection:
1080 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001081 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +00001082 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001083 with open(dest, 'wb') as f:
1084 while True:
1085 chunk = connection.read(ZIPPED_FILE_CHUNK)
1086 if not chunk:
1087 break
maruel@chromium.org3f039182012-11-27 21:32:41 +00001088 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001089 f.write(decompressor.decompress(chunk))
1090 # Ensure that all the data was properly decompressed.
1091 uncompressed_data = decompressor.flush()
1092 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +00001093 except IOError:
1094 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
1095 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +00001096 except httplib.HTTPException as e:
1097 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +00001098 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001099 # Log the first bytes to see if it's uncompressed data.
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001100 remaining_size = len(connection.read())
1101 msg = ('Problem unzipping data for item %s. Processed %d of %d bytes.'
1102 '\n%s' % (item, size, size + remaining_size, e))
1103 logging.error(msg)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001104
1105 # Testing seems to show that if a few machines are trying to download
1106 # the same blob, they can cause each other to fail. So if we hit a
1107 # zip error, this is the most likely cause (it only downloads some of
1108 # the data). Randomly sleep for between 5 and 25 seconds to try and
1109 # spread out the downloads.
1110 # TODO(csharp): Switch from blobstorage to cloud storage and see if
1111 # that solves the issue.
1112 sleep_duration = (random.random() * 20) + 5
1113 time.sleep(sleep_duration)
1114
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001115 raise IOError(msg)
1116
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001117
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001118 return download_file
1119
1120 def copy_file(item, dest):
1121 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001122 if source == dest:
1123 logging.info('Source and destination are the same, no action required')
1124 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001125 logging.debug('copy_file(%s, %s)', source, dest)
1126 shutil.copy(source, dest)
1127 return copy_file
1128
1129
1130class CachePolicies(object):
1131 def __init__(self, max_cache_size, min_free_space, max_items):
1132 """
1133 Arguments:
1134 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1135 cache is effectively a leak.
1136 - min_free_space: Trim if disk free space becomes lower than this value. If
1137 0, it unconditionally fill the disk.
1138 - max_items: Maximum number of items to keep in the cache. If 0, do not
1139 enforce a limit.
1140 """
1141 self.max_cache_size = max_cache_size
1142 self.min_free_space = min_free_space
1143 self.max_items = max_items
1144
1145
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001146class NoCache(object):
1147 """This class is intended to be usable everywhere the Cache class is.
1148 Instead of downloading to a cache, all files are downloaded to the target
1149 directory and then moved to where they are needed.
1150 """
1151
1152 def __init__(self, target_directory, remote):
1153 self.target_directory = target_directory
1154 self.remote = remote
1155
1156 def retrieve(self, priority, item, size):
1157 """Get the request file."""
1158 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001159 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001160
1161 def wait_for(self, items):
1162 """Download the first item of the given list if it is missing."""
1163 item = items.iterkeys().next()
1164
1165 if not os.path.exists(self.path(item)):
1166 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001167 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001168 assert downloaded == item
1169
1170 return item
1171
1172 def path(self, item):
1173 return os.path.join(self.target_directory, item)
1174
1175
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001176class Cache(object):
1177 """Stateful LRU cache.
1178
1179 Saves its state as json file.
1180 """
1181 STATE_FILE = 'state.json'
1182
1183 def __init__(self, cache_dir, remote, policies):
1184 """
1185 Arguments:
1186 - cache_dir: Directory where to place the cache.
1187 - remote: Remote where to fetch items from.
1188 - policies: cache retention policies.
1189 """
1190 self.cache_dir = cache_dir
1191 self.remote = remote
1192 self.policies = policies
1193 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1194 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1195 # self.state[0] is the oldest item.
1196 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001197 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001198 # A lookup map to speed up searching.
1199 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001200 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001201
1202 # Items currently being fetched. Keep it local to reduce lock contention.
1203 self._pending_queue = set()
1204
1205 # Profiling values.
1206 self._added = []
1207 self._removed = []
1208 self._free_disk = 0
1209
maruel@chromium.org770993b2012-12-11 17:16:48 +00001210 with Profiler('Setup'):
1211 if not os.path.isdir(self.cache_dir):
1212 os.makedirs(self.cache_dir)
1213 if os.path.isfile(self.state_file):
1214 try:
1215 self.state = json.load(open(self.state_file, 'r'))
1216 except (IOError, ValueError), e:
1217 # Too bad. The file will be overwritten and the cache cleared.
1218 logging.error(
1219 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1220 self._state_need_to_be_saved = True
1221 if (not isinstance(self.state, list) or
1222 not all(
1223 isinstance(i, (list, tuple)) and len(i) == 2
1224 for i in self.state)):
1225 # Discard.
1226 self._state_need_to_be_saved = True
1227 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001228
maruel@chromium.org770993b2012-12-11 17:16:48 +00001229 # Ensure that all files listed in the state still exist and add new ones.
1230 previous = set(filename for filename, _ in self.state)
1231 if len(previous) != len(self.state):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001232 logging.warning('Cache state is corrupted, found duplicate files')
maruel@chromium.org770993b2012-12-11 17:16:48 +00001233 self._state_need_to_be_saved = True
1234 self.state = []
1235
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001236 added = 0
1237 for filename in os.listdir(self.cache_dir):
1238 if filename == self.STATE_FILE:
1239 continue
1240 if filename in previous:
1241 previous.remove(filename)
1242 continue
1243 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001244 if not RE_IS_SHA1.match(filename):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001245 logging.warning('Removing unknown file %s from cache', filename)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001246 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001247 continue
1248 # Insert as the oldest file. It will be deleted eventually if not
1249 # accessed.
1250 self._add(filename, False)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001251 logging.warning('Add unknown file %s to cache', filename)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001252 added += 1
1253
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001254 if added:
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001255 logging.warning('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001256 if previous:
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001257 logging.warning('Removed %d lost files', len(previous))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001258 # Set explicitly in case self._add() wasn't called.
1259 self._state_need_to_be_saved = True
1260 # Filter out entries that were not found while keeping the previous
1261 # order.
1262 self.state = [
1263 (filename, size) for filename, size in self.state
1264 if filename not in previous
1265 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001266 self.trim()
1267
1268 def __enter__(self):
1269 return self
1270
1271 def __exit__(self, _exc_type, _exec_value, _traceback):
1272 with Profiler('CleanupTrimming'):
1273 self.trim()
1274
1275 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001276 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001277 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001278 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001279 len(self.state),
1280 sum(i[1] for i in self.state) / 1024)
1281 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001282 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1283 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001284
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001285 def remove_file_at_index(self, index):
1286 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001287 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001288 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001289 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001290 # If the lookup was already stale, its possible the filename was not
1291 # present yet.
1292 self._lookup_is_stale = True
1293 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001294 self._removed.append(size)
1295 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001296 except OSError as e:
1297 logging.error('Error attempting to delete a file\n%s' % e)
1298
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001299 def remove_lru_file(self):
1300 """Removes the last recently used file."""
1301 self.remove_file_at_index(0)
1302
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001303 def trim(self):
1304 """Trims anything we don't know, make sure enough free space exists."""
1305 # Ensure maximum cache size.
1306 if self.policies.max_cache_size and self.state:
1307 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1308 self.remove_lru_file()
1309
1310 # Ensure maximum number of items in the cache.
1311 if self.policies.max_items and self.state:
1312 while len(self.state) > self.policies.max_items:
1313 self.remove_lru_file()
1314
1315 # Ensure enough free space.
1316 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001317 trimmed_due_to_space = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001318 while (
1319 self.policies.min_free_space and
1320 self.state and
1321 self._free_disk < self.policies.min_free_space):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001322 trimmed_due_to_space = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001323 self.remove_lru_file()
1324 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001325 if trimmed_due_to_space:
1326 total = sum(i[1] for i in self.state)
1327 logging.warning(
1328 'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
1329 'cache (%.1f%% of its maximum capacity)',
1330 self._free_disk / 1024.,
1331 total / 1024.,
1332 100. * self.policies.max_cache_size / float(total),
1333 )
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001334 self.save()
1335
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001336 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001337 """Retrieves a file from the remote, if not already cached, and adds it to
1338 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001339
1340 If the file is in the cache, verifiy that the file is valid (i.e. it is
1341 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001342 """
1343 assert not '/' in item
1344 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001345 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001346 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001347
1348 if index is not None:
1349 if not valid_file(self.path(item), size):
1350 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001351 index = None
1352 else:
1353 assert index < len(self.state)
1354 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001355 self._state_need_to_be_saved = True
1356 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001357 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001358
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001359 if index is None:
1360 if item in self._pending_queue:
1361 # Already pending. The same object could be referenced multiple times.
1362 return
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001363 # TODO(maruel): It should look at the free disk space, the current cache
1364 # size and the size of the new item on every new item:
1365 # - Trim the cache as more entries are listed when free disk space is low,
1366 # otherwise if the amount of data downloaded during the run > free disk
1367 # space, it'll crash.
1368 # - Make sure there's enough free disk space to fit all dependencies of
1369 # this run! If not, abort early.
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001370 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001371 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001372
1373 def add(self, filepath, obj):
1374 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001375 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001376 if not obj in self._lookup:
1377 link_file(self.path(obj), filepath, HARDLINK)
1378 self._add(obj, True)
1379
1380 def path(self, item):
1381 """Returns the path to one item."""
1382 return os.path.join(self.cache_dir, item)
1383
1384 def save(self):
1385 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001386 if self._state_need_to_be_saved:
1387 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1388 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001389
1390 def wait_for(self, items):
1391 """Starts a loop that waits for at least one of |items| to be retrieved.
1392
1393 Returns the first item retrieved.
1394 """
1395 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001396 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001397 for item in items:
1398 if item in self._lookup:
1399 return item
1400
1401 assert all(i in self._pending_queue for i in items), (
1402 items, self._pending_queue)
1403 # Note that:
1404 # len(self._pending_queue) ==
1405 # ( len(self.remote._workers) - self.remote._ready +
1406 # len(self._remote._queue) + len(self._remote.done))
1407 # There is no lock-free way to verify that.
1408 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001409 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001410 self._pending_queue.remove(item)
1411 self._add(item, True)
1412 if item in items:
1413 return item
1414
1415 def _add(self, item, at_end):
1416 """Adds an item in the internal state.
1417
1418 If |at_end| is False, self._lookup becomes inconsistent and
1419 self._update_lookup() must be called.
1420 """
1421 size = os.stat(self.path(item)).st_size
1422 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001423 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001424 if at_end:
1425 self.state.append((item, size))
1426 self._lookup[item] = len(self.state) - 1
1427 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001428 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001429 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001430
1431 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001432 if self._lookup_is_stale:
1433 self._lookup = dict(
1434 (filename, index) for index, (filename, _) in enumerate(self.state))
1435 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001436
1437
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001438class IsolatedFile(object):
1439 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001440 def __init__(self, obj_hash):
1441 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001442 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001443 self.obj_hash = obj_hash
1444 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001445 # .isolate and all the .isolated files recursively included by it with
1446 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1447 # .isolated file in the hash table, is important, as the later ones are not
1448 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001449 self.can_fetch = False
1450
1451 # Raw data.
1452 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001453 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001454 self.children = []
1455
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001456 # Set once the .isolated file is loaded.
1457 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001458 # Set once the files are fetched.
1459 self.files_fetched = False
1460
1461 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001462 """Verifies the .isolated file is valid and loads this object with the json
1463 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001464 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001465 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1466 assert not self._is_parsed
1467 self.data = load_isolated(content)
1468 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1469 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001470
1471 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001472 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001473
1474 Preemptively request files.
1475
1476 Note that |files| is modified by this function.
1477 """
1478 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001479 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001480 return
1481 logging.debug('fetch_files(%s)' % self.obj_hash)
1482 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001483 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001484 # overriden files must not be fetched.
1485 if filepath not in files:
1486 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001487 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001488 # Preemptively request files.
1489 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001490 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001491 self.files_fetched = True
1492
1493
1494class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001495 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001496 def __init__(self):
1497 self.command = []
1498 self.files = {}
1499 self.read_only = None
1500 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001501 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001502 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001503
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001504 def load(self, cache, root_isolated_hash):
1505 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001506
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001507 It enables support for "included" .isolated files. They are processed in
1508 strict order but fetched asynchronously from the cache. This is important so
1509 that a file in an included .isolated file that is overridden by an embedding
1510 .isolated file is not fetched neededlessly. The includes are fetched in one
1511 pass and the files are fetched as soon as all the ones on the left-side
1512 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001513
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001514 The prioritization is very important here for nested .isolated files.
1515 'includes' have the highest priority and the algorithm is optimized for both
1516 deep and wide trees. A deep one is a long link of .isolated files referenced
1517 one at a time by one item in 'includes'. A wide one has a large number of
1518 'includes' in a single .isolated file. 'left' is defined as an included
1519 .isolated file earlier in the 'includes' list. So the order of the elements
1520 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001521 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001522 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001523 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001524 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001525 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001526 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001527
1528 def update_self(node):
1529 node.fetch_files(cache, self.files)
1530 # Grabs properties.
1531 if not self.command and node.data.get('command'):
1532 self.command = node.data['command']
1533 if self.read_only is None and node.data.get('read_only') is not None:
1534 self.read_only = node.data['read_only']
1535 if (self.relative_cwd is None and
1536 node.data.get('relative_cwd') is not None):
1537 self.relative_cwd = node.data['relative_cwd']
1538
1539 def traverse_tree(node):
1540 if node.can_fetch:
1541 if not node.files_fetched:
1542 update_self(node)
1543 will_break = False
1544 for i in node.children:
1545 if not i.can_fetch:
1546 if will_break:
1547 break
1548 # Automatically mark the first one as fetcheable.
1549 i.can_fetch = True
1550 will_break = True
1551 traverse_tree(i)
1552
1553 while pending:
1554 item_hash = cache.wait_for(pending)
1555 item = pending.pop(item_hash)
1556 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001557 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001558 # It's the root item.
1559 item.can_fetch = True
1560
1561 for new_child in item.children:
1562 h = new_child.obj_hash
1563 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001564 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001565 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001566 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001567
1568 # Traverse the whole tree to see if files can now be fetched.
1569 traverse_tree(self.root)
1570 def check(n):
1571 return all(check(x) for x in n.children) and n.files_fetched
1572 assert check(self.root)
1573 self.relative_cwd = self.relative_cwd or ''
1574 self.read_only = self.read_only or False
1575
1576
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001577def create_directories(base_directory, files):
1578 """Creates the directory structure needed by the given list of files."""
1579 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1580 # Creates the tree of directories to create.
1581 directories = set(os.path.dirname(f) for f in files)
1582 for item in list(directories):
1583 while item:
1584 directories.add(item)
1585 item = os.path.dirname(item)
1586 for d in sorted(directories):
1587 if d:
1588 os.mkdir(os.path.join(base_directory, d))
1589
1590
1591def create_links(base_directory, files):
1592 """Creates any links needed by the given set of files."""
1593 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001594 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001595 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001596 if sys.platform == 'win32':
1597 # TODO(maruel): Create junctions or empty text files similar to what
1598 # cygwin do?
1599 logging.warning('Ignoring symlink %s', filepath)
1600 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001601 outfile = os.path.join(base_directory, filepath)
1602 # symlink doesn't exist on Windows. So the 'link' property should
1603 # never be specified for windows .isolated file.
1604 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1605 if 'm' in properties:
1606 lchmod = getattr(os, 'lchmod', None)
1607 if lchmod:
1608 lchmod(outfile, properties['m'])
1609
1610
1611def setup_commands(base_directory, cwd, cmd):
1612 """Correctly adjusts and then returns the required working directory
1613 and command needed to run the test.
1614 """
1615 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1616 cwd = os.path.join(base_directory, cwd)
1617 if not os.path.isdir(cwd):
1618 os.makedirs(cwd)
1619
1620 # Ensure paths are correctly separated on windows.
1621 cmd[0] = cmd[0].replace('/', os.path.sep)
1622 cmd = fix_python_path(cmd)
1623
1624 return cwd, cmd
1625
1626
1627def generate_remaining_files(files):
1628 """Generates a dictionary of all the remaining files to be downloaded."""
1629 remaining = {}
1630 for filepath, props in files:
1631 if 'h' in props:
1632 remaining.setdefault(props['h'], []).append((filepath, props))
1633
1634 return remaining
1635
1636
1637def download_test_data(isolated_hash, target_directory, remote):
1638 """Downloads the dependencies to the given directory."""
1639 if not os.path.exists(target_directory):
1640 os.makedirs(target_directory)
1641
1642 settings = Settings()
1643 no_cache = NoCache(target_directory, Remote(remote))
1644
1645 # Download all the isolated files.
1646 with Profiler('GetIsolateds') as _prof:
1647 settings.load(no_cache, isolated_hash)
1648
1649 if not settings.command:
1650 print >> sys.stderr, 'No command to run'
1651 return 1
1652
1653 with Profiler('GetRest') as _prof:
1654 create_directories(target_directory, settings.files)
1655 create_links(target_directory, settings.files.iteritems())
1656
1657 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1658 settings.command[:])
1659
1660 remaining = generate_remaining_files(settings.files.iteritems())
1661
1662 # Now block on the remaining files to be downloaded and mapped.
1663 logging.info('Retrieving remaining files')
1664 last_update = time.time()
1665 while remaining:
1666 obj = no_cache.wait_for(remaining)
1667 files = remaining.pop(obj)
1668
1669 for i, (filepath, properties) in enumerate(files):
1670 outfile = os.path.join(target_directory, filepath)
1671 logging.info(no_cache.path(obj))
1672
1673 if i + 1 == len(files):
1674 os.rename(no_cache.path(obj), outfile)
1675 else:
1676 shutil.copyfile(no_cache.path(obj), outfile)
1677
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001678 if 'm' in properties and not sys.platform == 'win32':
1679 # It's not set on Windows. It could be set only in the case of
1680 # downloading content generated from another OS. Do not crash in that
1681 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001682 os.chmod(outfile, properties['m'])
1683
1684 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1685 logging.info('%d files remaining...' % len(remaining))
1686 last_update = time.time()
1687
1688 print('.isolated files successfully downloaded and setup in %s' %
1689 target_directory)
1690 print('To run this test please run the command %s from the directory %s' %
1691 (cmd, cwd))
1692
1693 return 0
1694
1695
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001696def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001697 """Downloads the dependencies in the cache, hardlinks them into a temporary
1698 directory and runs the executable.
1699 """
1700 settings = Settings()
1701 with Cache(cache_dir, Remote(remote), policies) as cache:
1702 outdir = make_temp_dir('run_tha_test', cache_dir)
1703 try:
1704 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001705 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001706 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001707 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001708 # Adds it in the cache. While not strictly necessary, this simplifies
1709 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001710 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001711 cache.add(isolated_hash, h)
1712 isolated_hash = h
1713 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001714
1715 if not settings.command:
1716 print >> sys.stderr, 'No command to run'
1717 return 1
1718
1719 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001720 create_directories(outdir, settings.files)
1721 create_links(outdir, settings.files.iteritems())
1722 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001723
1724 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001725 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1726 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001727
1728 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001729 logging.info('Retrieving remaining files')
1730 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001731 while remaining:
1732 obj = cache.wait_for(remaining)
1733 for filepath, properties in remaining.pop(obj):
1734 outfile = os.path.join(outdir, filepath)
1735 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001736 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001737 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001738 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001739
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001740 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1741 logging.info('%d files remaining...' % len(remaining))
1742 last_update = time.time()
1743
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001744 if settings.read_only:
1745 make_writable(outdir, True)
1746 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001747
1748 # TODO(csharp): This should be specified somewhere else.
1749 # Add a rotating log file if one doesn't already exist.
1750 env = os.environ.copy()
1751 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001752 try:
1753 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001754 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001755 except OSError:
1756 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1757 raise
1758 finally:
1759 rmtree(outdir)
1760
1761
1762def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001763 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001764 parser = optparse.OptionParser(
1765 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1766 parser.add_option(
1767 '-v', '--verbose', action='count', default=0, help='Use multiple times')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001768
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001769 group = optparse.OptionGroup(parser, 'Download')
1770 group.add_option(
1771 '--download', metavar='DEST',
1772 help='Downloads files to DEST and returns without running, instead of '
1773 'downloading and then running from a temporary directory.')
1774 parser.add_option_group(group)
1775
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001776 group = optparse.OptionGroup(parser, 'Data source')
1777 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001778 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001779 metavar='FILE',
1780 help='File/url describing what to map or run')
1781 group.add_option(
1782 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001783 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001784 parser.add_option_group(group)
1785
1786 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001787 '-r', '--remote', metavar='URL',
1788 default=
1789 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1790 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001791 group = optparse.OptionGroup(parser, 'Cache management')
1792 group.add_option(
1793 '--cache',
1794 default='cache',
1795 metavar='DIR',
1796 help='Cache directory, default=%default')
1797 group.add_option(
1798 '--max-cache-size',
1799 type='int',
1800 metavar='NNN',
1801 default=20*1024*1024*1024,
1802 help='Trim if the cache gets larger than this value, default=%default')
1803 group.add_option(
1804 '--min-free-space',
1805 type='int',
1806 metavar='NNN',
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001807 default=2*1024*1024*1024,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001808 help='Trim if disk free space becomes lower than this value, '
1809 'default=%default')
1810 group.add_option(
1811 '--max-items',
1812 type='int',
1813 metavar='NNN',
1814 default=100000,
1815 help='Trim if more than this number of items are in the cache '
1816 'default=%default')
1817 parser.add_option_group(group)
1818
1819 options, args = parser.parse_args()
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001820 levels = [logging.WARNING, logging.INFO, logging.DEBUG]
1821 level = levels[min(len(levels) - 1, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001822
1823 logging_console = logging.StreamHandler()
1824 logging_console.setFormatter(logging.Formatter(
1825 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1826 logging_console.setLevel(level)
1827 logging.getLogger().addHandler(logging_console)
1828
1829 logging_rotating_file = logging.handlers.RotatingFileHandler(
1830 RUN_ISOLATED_LOG_FILE,
1831 maxBytes=10 * 1024 * 1024, backupCount=5)
1832 logging_rotating_file.setLevel(logging.DEBUG)
1833 logging_rotating_file.setFormatter(logging.Formatter(
1834 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1835 logging.getLogger().addHandler(logging_rotating_file)
1836
1837 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001838
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001839 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001840 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001841 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001842 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001843 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001844 parser.error('Unsupported args %s' % ' '.join(args))
1845
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001846 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001847 policies = CachePolicies(
1848 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001849
1850 if options.download:
1851 return download_test_data(options.isolated or options.hash,
1852 options.download, options.remote)
1853 else:
1854 try:
1855 return run_tha_test(
1856 options.isolated or options.hash,
1857 options.cache,
1858 options.remote,
1859 policies)
1860 except Exception, e:
1861 # Make sure any exception is logged.
1862 logging.exception(e)
1863 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001864
1865
1866if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001867 # Ensure that we are always running with the correct encoding.
1868 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001869 sys.exit(main())