blob: c703ac3c635758590fc0bc833d104c194f966940 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000013import functools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000015import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000016import inspect
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000017import itertools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000019import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000020import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000021import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000022import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000023import optparse
24import os
25import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000026import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000027import re
28import shutil
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000029import socket
30import ssl
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000031import stat
32import subprocess
33import sys
34import tempfile
35import threading
36import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000037import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000039import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000040import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000041import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000042
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000043from utils import zip_package
44
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000045# Try to import 'upload' module used by AppEngineService for authentication.
46# If it is not there, app engine authentication support will be disabled.
47try:
48 from third_party import upload
49 # Hack out upload logging.info()
50 upload.logging = logging.getLogger('upload')
51 # Mac pylint choke on this line.
52 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
53except ImportError:
54 upload = None
55
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000056
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000057# Absolute path to this file.
58THIS_FILE_PATH = os.path.abspath(__file__)
59
60# Directory that contains this file (might be inside zip package).
61BASE_DIR = os.path.dirname(THIS_FILE_PATH)
62
63# Directory that contains currently running script file.
64MAIN_DIR = os.path.dirname(os.path.abspath(zip_package.get_main_script_path()))
65
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000066# Types of action accepted by link_file().
maruel@chromium.orgba6489b2013-07-11 20:23:33 +000067HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY = range(1, 5)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000068
69RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
70
csharp@chromium.org8dc52542012-11-08 20:29:55 +000071# The file size to be used when we don't know the correct file size,
72# generally used for .isolated files.
73UNKNOWN_FILE_SIZE = None
74
csharp@chromium.orga92403f2012-11-20 15:13:59 +000075# The size of each chunk to read when downloading and unzipping files.
76ZIPPED_FILE_CHUNK = 16 * 1024
77
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000078# The name of the log file to use.
79RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
80
csharp@chromium.orge217f302012-11-22 16:51:53 +000081# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000082RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000083
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000084# The delay (in seconds) to wait between logging statements when retrieving
85# the required files. This is intended to let the user (or buildbot) know that
86# the program is still running.
87DELAY_BETWEEN_UPDATES_IN_SECS = 30
88
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +000089# Maximum expected delay (in seconds) between successive file fetches
90# in run_tha_test. If it takes longer than that, a deadlock might be happening
91# and all stack frames for all threads are dumped to log.
92DEADLOCK_TIMEOUT = 5 * 60
93
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000094# The name of the key to store the count of url attempts.
95COUNT_KEY = 'UrlOpenAttempt'
96
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000097# Default maximum number of attempts to trying opening a url before aborting.
98URL_OPEN_MAX_ATTEMPTS = 30
99# Default timeout when retrying.
100URL_OPEN_TIMEOUT = 6*60.
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000101
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000102# Read timeout in seconds for downloads from isolate storage. If there's no
103# response from the server within this timeout whole download will be aborted.
104DOWNLOAD_READ_TIMEOUT = 60
105
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000106# Global (for now) map: server URL (http://example.com) -> HttpService instance.
107# Used by get_http_service to cache HttpService instances.
108_http_services = {}
109_http_services_lock = threading.Lock()
110
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000111# Used by get_flavor().
112FLAVOR_MAPPING = {
113 'cygwin': 'win',
114 'win32': 'win',
115 'darwin': 'mac',
116 'sunos5': 'solaris',
117 'freebsd7': 'freebsd',
118 'freebsd8': 'freebsd',
119}
120
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000121
122class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000123 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000124 pass
125
126
127class MappingError(OSError):
128 """Failed to recreate the tree."""
129 pass
130
131
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000132class TimeoutError(IOError):
133 """Timeout while reading HTTP response."""
134
135 def __init__(self, inner_exc=None):
136 super(TimeoutError, self).__init__(str(inner_exc or 'Timeout'))
137 self.inner_exc = inner_exc
138
139
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000140def get_as_zip_package(executable=True):
141 """Returns ZipPackage with this module and all its dependencies.
142
143 If |executable| is True will store run_isolated.py as __main__.py so that
144 zip package is directly executable be python.
145 """
146 # Building a zip package when running from another zip package is
147 # unsupported and probably unneeded.
148 assert not zip_package.is_zipped_module(sys.modules[__name__])
149 package = zip_package.ZipPackage(root=BASE_DIR)
150 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
151 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
152 package.add_directory(os.path.join(BASE_DIR, 'utils'))
153 return package
154
155
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000156def get_flavor():
157 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000158 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000159
160
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000161def fix_default_encoding():
162 """Forces utf8 solidly on all platforms.
163
164 By default python execution environment is lazy and defaults to ascii
165 encoding.
166
167 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
168 """
169 if sys.getdefaultencoding() == 'utf-8':
170 return False
171
172 # Regenerate setdefaultencoding.
173 reload(sys)
174 # Module 'sys' has no 'setdefaultencoding' member
175 # pylint: disable=E1101
176 sys.setdefaultencoding('utf-8')
177 for attr in dir(locale):
178 if attr[0:3] != 'LC_':
179 continue
180 aref = getattr(locale, attr)
181 try:
182 locale.setlocale(aref, '')
183 except locale.Error:
184 continue
185 try:
186 lang = locale.getlocale(aref)[0]
187 except (TypeError, ValueError):
188 continue
189 if lang:
190 try:
191 locale.setlocale(aref, (lang, 'UTF-8'))
192 except locale.Error:
193 os.environ[attr] = lang + '.UTF-8'
194 try:
195 locale.setlocale(locale.LC_ALL, '')
196 except locale.Error:
197 pass
198 return True
199
200
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000201class Unbuffered(object):
202 """Disable buffering on a file object."""
203 def __init__(self, stream):
204 self.stream = stream
205
206 def write(self, data):
207 self.stream.write(data)
208 if '\n' in data:
209 self.stream.flush()
210
211 def __getattr__(self, attr):
212 return getattr(self.stream, attr)
213
214
215def disable_buffering():
216 """Makes this process and child processes stdout unbuffered."""
217 if not os.environ.get('PYTHONUNBUFFERED'):
218 # Since sys.stdout is a C++ object, it's impossible to do
219 # sys.stdout.write = lambda...
220 sys.stdout = Unbuffered(sys.stdout)
221 os.environ['PYTHONUNBUFFERED'] = 'x'
222
223
maruel@chromium.orgea101982013-07-24 15:54:29 +0000224def num_processors():
225 """Returns the number of processors.
226
227 Python on OSX 10.6 raises a NotImplementedError exception.
228 """
229 try:
230 # Multiprocessing
231 import multiprocessing
232 return multiprocessing.cpu_count()
233 except: # pylint: disable=W0702
234 try:
235 # Mac OS 10.6
236 return int(os.sysconf('SC_NPROCESSORS_ONLN')) # pylint: disable=E1101
237 except:
238 # Some of the windows builders seem to get here.
239 return 4
240
241
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000242def os_link(source, link_name):
243 """Add support for os.link() on Windows."""
244 if sys.platform == 'win32':
245 if not ctypes.windll.kernel32.CreateHardLinkW(
246 unicode(link_name), unicode(source), 0):
247 raise OSError()
248 else:
249 os.link(source, link_name)
250
251
252def readable_copy(outfile, infile):
253 """Makes a copy of the file that is readable by everyone."""
csharp@chromium.org59d116d2013-07-05 18:04:08 +0000254 shutil.copy2(infile, outfile)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000255 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
256 stat.S_IRGRP | stat.S_IROTH)
257 os.chmod(outfile, read_enabled_mode)
258
259
260def link_file(outfile, infile, action):
261 """Links a file. The type of link depends on |action|."""
262 logging.debug('Mapping %s to %s' % (infile, outfile))
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000263 if action not in (HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000264 raise ValueError('Unknown mapping action %s' % action)
265 if not os.path.isfile(infile):
266 raise MappingError('%s is missing' % infile)
267 if os.path.isfile(outfile):
268 raise MappingError(
269 '%s already exist; insize:%d; outsize:%d' %
270 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
271
272 if action == COPY:
273 readable_copy(outfile, infile)
274 elif action == SYMLINK and sys.platform != 'win32':
275 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000276 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000277 else:
278 try:
279 os_link(infile, outfile)
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000280 except OSError as e:
281 if action == HARDLINK:
282 raise MappingError(
283 'Failed to hardlink %s to %s: %s' % (infile, outfile, e))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000284 # Probably a different file system.
maruel@chromium.org9e98e432013-05-31 17:06:51 +0000285 logging.warning(
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000286 'Failed to hardlink, failing back to copy %s to %s' % (
287 infile, outfile))
288 readable_copy(outfile, infile)
289
290
291def _set_write_bit(path, read_only):
292 """Sets or resets the executable bit on a file or directory."""
293 mode = os.lstat(path).st_mode
294 if read_only:
295 mode = mode & 0500
296 else:
297 mode = mode | 0200
298 if hasattr(os, 'lchmod'):
299 os.lchmod(path, mode) # pylint: disable=E1101
300 else:
301 if stat.S_ISLNK(mode):
302 # Skip symlink without lchmod() support.
303 logging.debug('Can\'t change +w bit on symlink %s' % path)
304 return
305
306 # TODO(maruel): Implement proper DACL modification on Windows.
307 os.chmod(path, mode)
308
309
310def make_writable(root, read_only):
311 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000312 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000313 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
314 for filename in filenames:
315 _set_write_bit(os.path.join(dirpath, filename), read_only)
316
317 for dirname in dirnames:
318 _set_write_bit(os.path.join(dirpath, dirname), read_only)
319
320
321def rmtree(root):
322 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
323 make_writable(root, False)
324 if sys.platform == 'win32':
325 for i in range(3):
326 try:
327 shutil.rmtree(root)
328 break
329 except WindowsError: # pylint: disable=E0602
330 delay = (i+1)*2
331 print >> sys.stderr, (
332 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
333 time.sleep(delay)
334 else:
335 shutil.rmtree(root)
336
337
338def is_same_filesystem(path1, path2):
339 """Returns True if both paths are on the same filesystem.
340
341 This is required to enable the use of hardlinks.
342 """
343 assert os.path.isabs(path1), path1
344 assert os.path.isabs(path2), path2
345 if sys.platform == 'win32':
346 # If the drive letter mismatches, assume it's a separate partition.
347 # TODO(maruel): It should look at the underlying drive, a drive letter could
348 # be a mount point to a directory on another drive.
349 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
350 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
351 if path1[0].lower() != path2[0].lower():
352 return False
353 return os.stat(path1).st_dev == os.stat(path2).st_dev
354
355
356def get_free_space(path):
357 """Returns the number of free bytes."""
358 if sys.platform == 'win32':
359 free_bytes = ctypes.c_ulonglong(0)
360 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
361 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
362 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000363 # For OSes other than Windows.
364 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000365 return f.f_bfree * f.f_frsize
366
367
368def make_temp_dir(prefix, root_dir):
369 """Returns a temporary directory on the same file system as root_dir."""
370 base_temp_dir = None
371 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
372 base_temp_dir = os.path.dirname(root_dir)
373 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
374
375
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000376def load_isolated(content, os_flavor=None):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000377 """Verifies the .isolated file is valid and loads this object with the json
378 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000379 """
380 try:
381 data = json.loads(content)
382 except ValueError:
383 raise ConfigError('Failed to parse: %s...' % content[:100])
384
385 if not isinstance(data, dict):
386 raise ConfigError('Expected dict, got %r' % data)
387
388 for key, value in data.iteritems():
389 if key == 'command':
390 if not isinstance(value, list):
391 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000392 if not value:
393 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000394 for subvalue in value:
395 if not isinstance(subvalue, basestring):
396 raise ConfigError('Expected string, got %r' % subvalue)
397
398 elif key == 'files':
399 if not isinstance(value, dict):
400 raise ConfigError('Expected dict, got %r' % value)
401 for subkey, subvalue in value.iteritems():
402 if not isinstance(subkey, basestring):
403 raise ConfigError('Expected string, got %r' % subkey)
404 if not isinstance(subvalue, dict):
405 raise ConfigError('Expected dict, got %r' % subvalue)
406 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000407 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000408 if not isinstance(subsubvalue, basestring):
409 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000410 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000411 if not isinstance(subsubvalue, int):
412 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000413 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000414 if not RE_IS_SHA1.match(subsubvalue):
415 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000416 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000417 if not isinstance(subsubvalue, int):
418 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000419 else:
420 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000421 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000422 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000423 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
424 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000425
426 elif key == 'includes':
427 if not isinstance(value, list):
428 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000429 if not value:
430 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000431 for subvalue in value:
432 if not RE_IS_SHA1.match(subvalue):
433 raise ConfigError('Expected sha-1, got %r' % subvalue)
434
435 elif key == 'read_only':
436 if not isinstance(value, bool):
437 raise ConfigError('Expected bool, got %r' % value)
438
439 elif key == 'relative_cwd':
440 if not isinstance(value, basestring):
441 raise ConfigError('Expected string, got %r' % value)
442
443 elif key == 'os':
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000444 expected_value = os_flavor or get_flavor()
445 if value != expected_value:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000446 raise ConfigError(
447 'Expected \'os\' to be \'%s\' but got \'%s\'' %
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000448 (expected_value, value))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000449
450 else:
451 raise ConfigError('Unknown key %s' % key)
452
453 return data
454
455
456def fix_python_path(cmd):
457 """Returns the fixed command line to call the right python executable."""
458 out = cmd[:]
459 if out[0] == 'python':
460 out[0] = sys.executable
461 elif out[0].endswith('.py'):
462 out.insert(0, sys.executable)
463 return out
464
465
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000466def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000467 """Attempts to open the given url multiple times.
468
469 |data| can be either:
470 -None for a GET request
471 -str for pre-encoded data
472 -list for data to be encoded
473 -dict for data to be encoded (COUNT_KEY will be added in this case)
474
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000475 Returns HttpResponse object, where the response may be read from, or None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000476 if it was unable to connect.
477 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000478 urlhost, urlpath = split_server_request_url(url)
479 service = get_http_service(urlhost)
480 return service.request(urlpath, **kwargs)
481
482
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000483def url_read(url, **kwargs):
484 """Attempts to open the given url multiple times and read all data from it.
485
486 Accepts same arguments as url_open function.
487
488 Returns all data read or None if it was unable to connect or read the data.
489 """
490 response = url_open(url, **kwargs)
491 if not response:
492 return None
493 try:
494 return response.read()
495 except TimeoutError:
496 return None
497
498
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000499def split_server_request_url(url):
500 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000501 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000502 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
503 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
504 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000505
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000506
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000507def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000508 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000509 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000510 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000511 # Ensure consistency.
512 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000513 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000514 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000515 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000516 service = AppEngineService(urlhost)
517 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000518 return service
519
520
521class HttpService(object):
522 """Base class for a class that provides an API to HTTP based service:
523 - Provides 'request' method.
524 - Supports automatic request retries.
525 - Supports persistent cookies.
526 - Thread safe.
527 """
528
529 # File to use to store all auth cookies.
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000530 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000531
532 # CookieJar reused by all services + lock that protects its instantiation.
533 _cookie_jar = None
534 _cookie_jar_lock = threading.Lock()
535
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000536 def __init__(self, urlhost):
537 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000538 self.cookie_jar = self.load_cookie_jar()
539 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000540
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000541 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000542 """Called when HTTP server asks client to authenticate.
543 Can be implemented in subclasses.
544 """
545 return False
546
547 @staticmethod
548 def load_cookie_jar():
549 """Returns global CoookieJar object that stores cookies in the file."""
550 with HttpService._cookie_jar_lock:
551 if HttpService._cookie_jar is not None:
552 return HttpService._cookie_jar
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000553 jar = ThreadSafeCookieJar(HttpService.COOKIE_FILE)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000554 jar.load()
555 HttpService._cookie_jar = jar
556 return jar
557
558 @staticmethod
559 def save_cookie_jar():
560 """Called when cookie jar needs to be flushed to disk."""
561 with HttpService._cookie_jar_lock:
562 if HttpService._cookie_jar is not None:
563 HttpService._cookie_jar.save()
564
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000565 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000566 """Returns OpenerDirector that will be used when sending requests.
567 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000568 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000569
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000570 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000571 """Attempts to open the given url multiple times.
572
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000573 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000574
575 |data| can be either:
576 -None for a GET request
577 -str for pre-encoded data
578 -list for data to be encoded
579 -dict for data to be encoded (COUNT_KEY will be added in this case)
580
581 Returns a file-like object, where the response may be read from, or None
582 if it was unable to connect.
583 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000584 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000585
586 if isinstance(data, dict) and COUNT_KEY in data:
587 logging.error('%s already existed in the data passed into UlrOpen. It '
588 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
589 return None
590
591 method = 'GET' if data is None else 'POST'
592 assert not ((method != 'POST') and content_type), (
593 'Can\'t use content_type on GET')
594
595 def make_request(extra):
596 """Returns a urllib2.Request instance for this specific retry."""
597 if isinstance(data, str) or data is None:
598 payload = data
599 else:
600 if isinstance(data, dict):
601 payload = data.items()
602 else:
603 payload = data[:]
604 payload.extend(extra.iteritems())
605 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000606 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000607 if isinstance(data, str) or data is None:
608 # In these cases, add the extra parameter to the query part of the url.
609 url_parts = list(urlparse.urlparse(new_url))
610 # Append the query parameter.
611 if url_parts[4] and extra:
612 url_parts[4] += '&'
613 url_parts[4] += urllib.urlencode(extra)
614 new_url = urlparse.urlunparse(url_parts)
615 request = urllib2.Request(new_url, data=payload)
616 if payload is not None:
617 if content_type:
618 request.add_header('Content-Type', content_type)
619 request.add_header('Content-Length', len(payload))
620 return request
621
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000622 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000623
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000624 def _retry_loop(
625 self,
626 make_request,
627 max_attempts=URL_OPEN_MAX_ATTEMPTS,
628 retry_404=False,
629 retry_50x=True,
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000630 timeout=URL_OPEN_TIMEOUT,
631 read_timeout=None):
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000632 """Runs internal request-retry loop.
633
634 - Optionally retries HTTP 404 and 50x.
635 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
636 number of retries.
637 - Retries up to |timeout| duration in seconds. If None or 0, there's no
638 limit in the time taken to do retries.
639 - If both |max_attempts| and |timeout| are None or 0, this functions retries
640 indefinitely.
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000641
642 If |read_timeout| is not None will configure underlying socket to
643 raise TimeoutError exception whenever there's no response from the server
644 for more than |read_timeout| seconds. It can happen during any read
645 operation so once you pass non-None |read_timeout| be prepared to handle
646 these exceptions in subsequent reads from the stream.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000647 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000648 authenticated = False
649 last_error = None
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000650 attempt = 0
651 start = self._now()
652 for attempt in itertools.count():
653 if max_attempts and attempt >= max_attempts:
654 # Too many attempts.
655 break
656 if timeout and (self._now() - start) >= timeout:
657 # Retried for too long.
658 break
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000659 extra = {COUNT_KEY: attempt} if attempt else {}
660 request = make_request(extra)
661 try:
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000662 url_response = self._url_open(request, timeout=read_timeout)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000663 logging.debug('url_open(%s) succeeded', request.get_full_url())
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000664 return HttpResponse(url_response, request.get_full_url())
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000665 except urllib2.HTTPError as e:
666 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000667 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000668 # Try to authenticate only once. If it doesn't help, then server does
669 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000670 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000671 'Authentication is required for %s on attempt %d.\n%s',
672 request.get_full_url(), attempt,
673 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000674 if not authenticated and self.authenticate():
675 authenticated = True
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000676 # Do not sleep.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000677 continue
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000678 # If authentication failed, return.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000679 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000680 'Unable to authenticate to %s.\n%s',
681 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000682 return None
683
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000684 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
685 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000686 # This HTTPError means we reached the server and there was a problem
687 # with the request, so don't retry.
688 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000689 'Able to connect to %s but an exception was thrown.\n%s',
690 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000691 return None
692
693 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000694 logging.warning('Able to connect to %s on attempt %d.\n%s',
695 request.get_full_url(), attempt,
696 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000697 last_error = e
698
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000699 except (urllib2.URLError, httplib.HTTPException,
700 socket.timeout, ssl.SSLError) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000701 logging.warning('Unable to open url %s on attempt %d.\n%s',
702 request.get_full_url(), attempt,
703 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000704 last_error = e
705
706 # Only sleep if we are going to try again.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000707 if max_attempts and attempt != max_attempts:
708 remaining = None
709 if timeout:
710 remaining = timeout - (self._now() - start)
711 if remaining <= 0:
712 break
713 self.sleep_before_retry(attempt, remaining)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000714
715 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000716 request.get_full_url(), max_attempts,
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000717 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000718 return None
719
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000720 def _url_open(self, request, timeout=None):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000721 """Low level method to execute urllib2.Request's.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000722
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000723 To be mocked in tests.
724 """
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000725 if timeout is not None:
726 return self.opener.open(request, timeout=timeout)
727 else:
728 # Leave original default value for |timeout|. It's nontrivial.
729 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000730
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000731 @staticmethod
732 def _now():
733 """To be mocked in tests."""
734 return time.time()
735
736 @staticmethod
737 def calculate_sleep_before_retry(attempt, max_duration):
738 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
739 # survive.
740 MAX_SLEEP = 10.
741 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
742 # time by starting with 1.5/2+1.5^-1 median offset.
743 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
744 assert duration > 0.1
745 duration = min(MAX_SLEEP, duration)
746 if max_duration:
747 duration = min(max_duration, duration)
748 return duration
749
750 @classmethod
751 def sleep_before_retry(cls, attempt, max_duration):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000752 """Sleeps for some amount of time when retrying the request.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000753
754 To be mocked in tests.
755 """
756 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
maruel@chromium.orgef333122013-03-12 20:36:40 +0000757
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000758 @staticmethod
759 def _format_exception(exc, verbose=False):
760 """Given an instance of some exception raised by urlopen returns human
761 readable piece of text with detailed information about the error.
762 """
763 out = ['Exception: %s' % (exc,)]
764 if verbose:
765 if isinstance(exc, urllib2.HTTPError):
766 out.append('-' * 10)
767 if exc.hdrs:
768 for header, value in exc.hdrs.items():
769 if not header.startswith('x-'):
770 out.append('%s: %s' % (header.capitalize(), value))
771 out.append('')
772 out.append(exc.read() or '<empty body>')
773 out.append('-' * 10)
774 return '\n'.join(out)
775
maruel@chromium.orgef333122013-03-12 20:36:40 +0000776
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000777class HttpResponse(object):
778 """Response from HttpService."""
779
780 def __init__(self, url_response, url):
781 self._url_response = url_response
782 self._url = url
783 self._read = 0
784
785 @property
786 def content_length(self):
787 """Total length to the response or None if not known in advance."""
788 length = self._url_response.headers.get('Content-Length')
789 return int(length) if length is not None else None
790
791 def read(self, size=None):
792 """Reads up to |size| bytes from the stream and returns them.
793
794 If |size| is None reads all available bytes.
795
796 Raises TimeoutError on read timeout.
797 """
798 try:
799 data = self._url_response.read(size)
800 self._read += len(data)
801 return data
802 except (socket.timeout, ssl.SSLError) as e:
803 logging.error('Timeout while reading from %s, read %d of %s: %s',
804 self._url, self._read, self.content_length, e)
805 raise TimeoutError(e)
806
807
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000808class AppEngineService(HttpService):
809 """This class implements authentication support for
810 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000811 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000812
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000813 # This lock ensures that user won't be confused with multiple concurrent
814 # login prompts.
815 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000816
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000817 def __init__(self, urlhost, email=None, password=None):
818 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000819 self.email = email
820 self.password = password
821 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000822
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000823 def authenticate(self):
824 """Authenticates in the app engine application.
825 Returns True on success.
826 """
827 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000828 logging.error('\'upload\' module is missing, '
829 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000830 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000831 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000832 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000833 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000834 class AuthServer(upload.AbstractRpcServer):
835 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000836 # Authentication code needs to know about 302 response.
837 # So make OpenerDirector without HTTPRedirectHandler.
838 opener = urllib2.OpenerDirector()
839 opener.add_handler(urllib2.ProxyHandler())
840 opener.add_handler(urllib2.UnknownHandler())
841 opener.add_handler(urllib2.HTTPHandler())
842 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
843 opener.add_handler(urllib2.HTTPSHandler())
844 opener.add_handler(urllib2.HTTPErrorProcessor())
845 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000846 return opener
847 def PerformAuthentication(self):
848 self._Authenticate()
849 save_cookie_jar()
850 return self.authenticated
851 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000852 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000853 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000854
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000855 def get_credentials(self):
856 """Called during authentication process to get the credentials.
857 May be called mutliple times if authentication fails.
858 Returns tuple (email, password).
859 """
860 # 'authenticate' calls this only if 'upload' is present.
861 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
862 assert upload, '\'upload\' module is required for this to work'
863 if self.email and self.password:
864 return (self.email, self.password)
865 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000866 self._keyring = upload.KeyringCreds(self.urlhost,
867 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000868 self.email)
869 return self._keyring.GetUserCredentials()
870
871
872class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
873 """MozillaCookieJar with thread safe load and save."""
874
875 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
876 """Loads cookies from the file if it exists."""
maruel@chromium.org4e2676d2013-06-06 18:39:48 +0000877 filename = os.path.expanduser(filename or self.filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000878 with self._cookies_lock:
879 if os.path.exists(filename):
880 try:
881 cookielib.MozillaCookieJar.load(self, filename,
882 ignore_discard,
883 ignore_expires)
884 logging.debug('Loaded cookies from %s', filename)
885 except (cookielib.LoadError, IOError):
886 pass
887 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000888 try:
889 fd = os.open(filename, os.O_CREAT, 0600)
890 os.close(fd)
891 except OSError:
892 logging.error('Failed to create %s', filename)
893 try:
894 os.chmod(filename, 0600)
895 except OSError:
896 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000897
898 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
899 """Saves cookies to the file, completely overwriting it."""
900 logging.debug('Saving cookies to %s', filename or self.filename)
901 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000902 try:
903 cookielib.MozillaCookieJar.save(self, filename,
904 ignore_discard,
905 ignore_expires)
906 except OSError:
907 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000908
909
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000910class ThreadPoolError(Exception):
911 """Base class for exceptions raised by ThreadPool."""
912
913
914class ThreadPoolEmpty(ThreadPoolError):
915 """Trying to get task result from a thread pool with no pending tasks."""
916
917
918class ThreadPoolClosed(ThreadPoolError):
919 """Trying to do something with a closed thread pool."""
920
921
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000922class ThreadPool(object):
923 """Implements a multithreaded worker pool oriented for mapping jobs with
924 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000925
926 Arguments:
927 - initial_threads: Number of threads to start immediately. Can be 0 if it is
928 uncertain that threads will be needed.
929 - max_threads: Maximum number of threads that will be started when all the
930 threads are busy working. Often the number of CPU cores.
931 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
932 queue. A non-zero value may make add_task() blocking.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000933 - prefix: Prefix to use for thread names. Pool's threads will be
934 named '<prefix>-<thread index>'.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000935 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000936 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000937
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000938 def __init__(self, initial_threads, max_threads, queue_size, prefix=None):
939 prefix = prefix or 'tp-0x%0x' % id(self)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000940 logging.debug(
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000941 'New ThreadPool(%d, %d, %d): %s', initial_threads, max_threads,
942 queue_size, prefix)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000943 assert initial_threads <= max_threads
944 # Update this check once 256 cores CPU are common.
945 assert max_threads <= 256
946
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000947 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000948 self._max_threads = max_threads
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000949 self._prefix = prefix
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000950
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000951 # Used to assign indexes to tasks.
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000952 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000953 self._num_of_added_tasks = 0
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000954
955 # Lock that protected everything below (including conditional variable).
956 self._lock = threading.Lock()
957
958 # Condition 'bool(_outputs) or bool(_exceptions) or _pending_count == 0'.
959 self._outputs_exceptions_cond = threading.Condition(self._lock)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000960 self._outputs = []
961 self._exceptions = []
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000962
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000963 # Number of pending tasks (queued or being processed now).
964 self._pending_count = 0
965
966 # List of threads.
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000967 self._workers = []
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000968 # Number of threads that are waiting for new tasks.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000969 self._ready = 0
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000970 # Number of threads already added to _workers, but not yet running the loop.
971 self._starting = 0
972 # True if close was called. Forbids adding new tasks.
973 self._is_closed = False
974
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000975 for _ in range(initial_threads):
976 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000977
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000978 def _add_worker(self):
979 """Adds one worker thread if there isn't too many. Thread-safe."""
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +0000980 with self._lock:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000981 if len(self._workers) >= self._max_threads or self._is_closed:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000982 return False
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000983 worker = threading.Thread(
984 name='%s-%d' % (self._prefix, len(self._workers)), target=self._run)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000985 self._workers.append(worker)
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000986 self._starting += 1
987 logging.debug('Starting worker thread %s', worker.name)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000988 worker.daemon = True
989 worker.start()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000990 return True
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000991
maruel@chromium.org831958f2013-01-22 15:01:46 +0000992 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000993 """Adds a task, a function to be executed by a worker.
994
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000995 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000996 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000997
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000998 |func| can either return a return value to be added to the output list or
999 be a generator which can emit multiple values.
1000
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001001 Returns the index of the item added, e.g. the total number of enqueued items
1002 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001003 """
maruel@chromium.org831958f2013-01-22 15:01:46 +00001004 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001005 assert callable(func)
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001006 with self._lock:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001007 if self._is_closed:
1008 raise ThreadPoolClosed('Can not add a task to a closed ThreadPool')
1009 start_new_worker = (
1010 # Pending task count plus new task > number of available workers.
1011 self.tasks.qsize() + 1 > self._ready + self._starting and
1012 # Enough slots.
1013 len(self._workers) < self._max_threads
1014 )
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001015 self._pending_count += 1
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001016 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001017 self._num_of_added_tasks += 1
1018 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001019 self.tasks.put((priority, index, func, args, kwargs))
1020 if start_new_worker:
1021 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001022 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001023
1024 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001025 """Worker thread loop. Runs until a None task is queued."""
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001026 # Thread has started, adjust counters.
1027 with self._lock:
1028 self._starting -= 1
1029 self._ready += 1
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001030 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001031 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001032 task = self.tasks.get()
1033 finally:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001034 with self._lock:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001035 self._ready -= 1
1036 try:
1037 if task is None:
1038 # We're done.
1039 return
1040 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001041 if inspect.isgeneratorfunction(func):
1042 for out in func(*args, **kwargs):
1043 self._output_append(out)
1044 else:
1045 out = func(*args, **kwargs)
1046 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001047 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001048 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001049 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +00001050 logging.info(''.join(traceback.format_tb(exc_info[2])))
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001051 with self._outputs_exceptions_cond:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001052 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001053 self._outputs_exceptions_cond.notifyAll()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001054 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +00001055 try:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001056 # Mark thread as ready again, mark task as processed. Do it before
1057 # waking up threads waiting on self.tasks.join(). Otherwise they might
1058 # find ThreadPool still 'busy' and perform unnecessary wait on CV.
1059 with self._outputs_exceptions_cond:
1060 self._ready += 1
1061 self._pending_count -= 1
1062 if self._pending_count == 0:
1063 self._outputs_exceptions_cond.notifyAll()
csharp@chromium.org60991182013-03-18 13:44:17 +00001064 self.tasks.task_done()
1065 except Exception as e:
1066 # We need to catch and log this error here because this is the root
1067 # function for the thread, nothing higher will catch the error.
1068 logging.exception('Caught exception while marking task as done: %s',
1069 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001070
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001071 def _output_append(self, out):
1072 if out is not None:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001073 with self._outputs_exceptions_cond:
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001074 self._outputs.append(out)
1075 self._outputs_exceptions_cond.notifyAll()
maruel@chromium.orgedd25d02013-03-26 14:38:00 +00001076
maruel@chromium.orgeb281652012-11-08 21:10:23 +00001077 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001078 """Extracts all the results from each threads unordered.
1079
1080 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001081
1082 Note: will wait for all work items to be done before returning an exception.
1083 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +00001084 """
1085 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +00001086 self.tasks.join()
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001087 with self._outputs_exceptions_cond:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001088 if self._exceptions:
1089 e = self._exceptions.pop(0)
1090 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +00001091 out = self._outputs
1092 self._outputs = []
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001093 return out
1094
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001095 def get_one_result(self):
1096 """Returns the next item that was generated or raises an exception if one
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001097 occurred.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001098
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001099 Raises:
1100 ThreadPoolEmpty - no results available.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001101 """
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001102 # Get first available result.
1103 for result in self.iter_results():
1104 return result
1105 # No results -> tasks queue is empty.
1106 raise ThreadPoolEmpty('Task queue is empty')
1107
1108 def iter_results(self):
1109 """Yields results as they appear until all tasks are processed."""
1110 while True:
1111 # Check for pending results.
1112 result = None
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001113 with self._outputs_exceptions_cond:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001114 if self._exceptions:
1115 e = self._exceptions.pop(0)
1116 raise e[0], e[1], e[2]
1117 if self._outputs:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001118 # Remember the result to yield it outside of the lock.
1119 result = self._outputs.pop(0)
1120 else:
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001121 # No pending tasks -> all tasks are done.
1122 if not self._pending_count:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001123 return
1124 # Some task is queued, wait for its result to appear.
1125 # Use non-None timeout so that process reacts to Ctrl+C and other
1126 # signals, see http://bugs.python.org/issue8844.
1127 self._outputs_exceptions_cond.wait(timeout=5)
1128 continue
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001129 yield result
1130
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001131 def close(self):
1132 """Closes all the threads."""
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001133 # Ensure no new threads can be started, self._workers is effectively
1134 # a constant after that and can be accessed outside the lock.
vadimsh@chromium.org6e2eca62013-07-10 13:47:36 +00001135 with self._lock:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001136 if self._is_closed:
1137 raise ThreadPoolClosed('Can not close already closed ThreadPool')
1138 self._is_closed = True
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001139 for _ in range(len(self._workers)):
1140 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +00001141 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001142 for t in self._workers:
1143 t.join()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001144 logging.debug(
1145 'Thread pool \'%s\' closed: spawned %d threads total',
1146 self._prefix, len(self._workers))
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001147
1148 def __enter__(self):
1149 """Enables 'with' statement."""
1150 return self
1151
maruel@chromium.org97cd0be2013-03-13 14:01:36 +00001152 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001153 """Enables 'with' statement."""
1154 self.close()
1155
1156
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001157def valid_file(filepath, size):
1158 """Determines if the given files appears valid (currently it just checks
1159 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001160 if size == UNKNOWN_FILE_SIZE:
1161 return True
1162 actual_size = os.stat(filepath).st_size
1163 if size != actual_size:
1164 logging.warning(
1165 'Found invalid item %s; %d != %d',
1166 os.path.basename(filepath), actual_size, size)
1167 return False
1168 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001169
1170
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001171class Profiler(object):
1172 def __init__(self, name):
1173 self.name = name
1174 self.start_time = None
1175
1176 def __enter__(self):
1177 self.start_time = time.time()
1178 return self
1179
1180 def __exit__(self, _exc_type, _exec_value, _traceback):
1181 time_taken = time.time() - self.start_time
1182 logging.info('Profiling: Section %s took %3.3f seconds',
1183 self.name, time_taken)
1184
1185
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001186class DeadlockDetector(object):
1187 """Context manager that can detect deadlocks.
1188
1189 It will dump stack frames of all running threads if its 'ping' method isn't
1190 called in time.
1191
1192 Usage:
1193 with DeadlockDetector(timeout=60) as detector:
1194 for item in some_work():
1195 ...
1196 detector.ping()
1197 ...
1198
1199 Arguments:
1200 timeout - maximum allowed time between calls to 'ping'.
1201 """
1202
1203 def __init__(self, timeout):
1204 self.timeout = timeout
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001205 self._thread = None
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001206 # Thread stop condition. Also lock for shared variables below.
1207 self._stop_cv = threading.Condition()
1208 self._stop_flag = False
1209 # Time when 'ping' was called last time.
1210 self._last_ping = None
1211 # True if pings are coming on time.
1212 self._alive = True
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001213
1214 def __enter__(self):
1215 """Starts internal watcher thread."""
1216 assert self._thread is None
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001217 self.ping()
1218 self._thread = threading.Thread(name='deadlock-detector', target=self._run)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001219 self._thread.daemon = True
1220 self._thread.start()
1221 return self
1222
1223 def __exit__(self, *_args):
1224 """Stops internal watcher thread."""
1225 assert self._thread is not None
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001226 with self._stop_cv:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001227 self._stop_flag = True
1228 self._stop_cv.notify()
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001229 self._thread.join()
1230 self._thread = None
1231 self._stop_flag = False
1232
1233 def ping(self):
1234 """Notify detector that main thread is still running.
1235
1236 Should be called periodically to inform the detector that everything is
1237 running as it should.
1238 """
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001239 with self._stop_cv:
1240 self._last_ping = time.time()
1241 self._alive = True
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001242
1243 def _run(self):
1244 """Loop that watches for pings and dumps threads state if ping is late."""
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001245 with self._stop_cv:
1246 while not self._stop_flag:
1247 # Skipped deadline? Dump threads and switch to 'not alive' state.
1248 if self._alive and time.time() > self._last_ping + self.timeout:
1249 self.dump_threads(time.time() - self._last_ping, True)
1250 self._alive = False
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001251
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001252 # Pings are on time?
1253 if self._alive:
1254 # Wait until the moment we need to dump stack traces.
1255 # Most probably some other thread will call 'ping' to move deadline
1256 # further in time. We don't bother to wake up after each 'ping',
1257 # only right before initial expected deadline.
1258 self._stop_cv.wait(self._last_ping + self.timeout - time.time())
1259 else:
1260 # Skipped some pings previously. Just periodically silently check
1261 # for new pings with some arbitrary frequency.
1262 self._stop_cv.wait(self.timeout * 0.1)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001263
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001264 @staticmethod
1265 def dump_threads(timeout=None, skip_current_thread=False):
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001266 """Dumps stack frames of all running threads."""
1267 all_threads = threading.enumerate()
1268 current_thread_id = threading.current_thread().ident
1269
1270 # Collect tracebacks: thread name -> traceback string.
1271 tracebacks = {}
1272
1273 # pylint: disable=W0212
1274 for thread_id, frame in sys._current_frames().iteritems():
1275 # Don't dump deadlock detector's own thread, it's boring.
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001276 if thread_id == current_thread_id and not skip_current_thread:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001277 continue
1278
1279 # Try to get more informative symbolic thread name.
1280 name = 'untitled'
1281 for thread in all_threads:
1282 if thread.ident == thread_id:
1283 name = thread.name
1284 break
1285 name += ' #%d' % (thread_id,)
1286 tracebacks[name] = ''.join(traceback.format_stack(frame))
1287
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001288 # Function to print a message. Makes it easier to change output destination.
1289 def output(msg):
1290 logging.warning(msg.rstrip())
1291
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001292 # Print tracebacks, sorting them by thread name. That way a thread pool's
1293 # threads will be printed as one group.
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001294 output('=============== Potential deadlock detected ===============')
1295 if timeout is not None:
1296 output('No pings in last %d sec.' % (timeout,))
1297 output('Dumping stack frames for all threads:')
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001298 for name in sorted(tracebacks):
vadimsh@chromium.orgea769022013-07-11 13:35:49 +00001299 output('Traceback for \'%s\':\n%s' % (name, tracebacks[name]))
1300 output('===========================================================')
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001301
1302
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001303class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001304 """Priority based worker queue to fetch or upload files from a
1305 content-address server. Any function may be given as the fetcher/upload,
1306 as long as it takes two inputs (the item contents, and their relative
1307 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001308
1309 Supports local file system, CIFS or http remotes.
1310
1311 When the priority of items is equals, works in strict FIFO mode.
1312 """
1313 # Initial and maximum number of worker threads.
1314 INITIAL_WORKERS = 2
1315 MAX_WORKERS = 16
1316 # Priorities.
1317 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
1318 INTERNAL_PRIORITY_BITS = (1<<8) - 1
1319 RETRIES = 5
1320
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001321 def __init__(self, destination_root):
1322 # Function to fetch a remote object or upload to a remote location..
1323 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001324 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001325 self._done = Queue.PriorityQueue()
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001326 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0, 'remote')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001327
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001328 def join(self):
1329 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001330 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001331
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001332 def close(self):
1333 """Terminates all worker threads."""
1334 self._pool.close()
1335
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001336 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001337 """Retrieves an object from the remote data store.
1338
1339 The smaller |priority| gets fetched first.
1340
1341 Thread-safe.
1342 """
1343 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001344 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001345
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001346 def _add_item(self, priority, obj, dest, size):
1347 assert isinstance(obj, basestring), obj
1348 assert isinstance(dest, basestring), dest
1349 assert size is None or isinstance(size, int), size
1350 return self._pool.add_task(
1351 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001352
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001353 def get_one_result(self):
1354 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001355
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001356 def _task_executer(self, priority, obj, dest, size):
1357 """Wraps self._do_item to trap and retry on IOError exceptions."""
1358 try:
1359 self._do_item(obj, dest)
1360 if size and not valid_file(dest, size):
1361 download_size = os.stat(dest).st_size
1362 os.remove(dest)
1363 raise IOError('File incorrect size after download of %s. Got %s and '
1364 'expected %s' % (obj, download_size, size))
1365 # TODO(maruel): Technically, we'd want to have an output queue to be a
1366 # PriorityQueue.
1367 return obj
1368 except IOError as e:
1369 logging.debug('Caught IOError: %s', e)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001370 # Remove unfinished download.
1371 if os.path.exists(dest):
1372 os.remove(dest)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001373 # Retry a few times, lowering the priority.
1374 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
1375 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001376 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001377 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001378
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001379 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001380 """Returns a object to retrieve objects from a remote."""
1381 if re.match(r'^https?://.+$', file_or_url):
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001382 return functools.partial(self._download_file, file_or_url)
1383 else:
1384 return functools.partial(self._copy_file, file_or_url)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001385
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001386 @staticmethod
1387 def _download_file(base_url, item, dest):
1388 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1389 # easy.
1390 try:
1391 zipped_source = base_url + item
1392 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001393
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001394 # Because the app engine DB is only eventually consistent, retry
1395 # 404 errors because the file might just not be visible yet (even
1396 # though it has been uploaded).
1397 connection = url_open(zipped_source, retry_404=True,
1398 read_timeout=DOWNLOAD_READ_TIMEOUT)
1399 if not connection:
1400 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001401
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001402 content_length = connection.content_length
1403 decompressor = zlib.decompressobj()
1404 size = 0
1405 with open(dest, 'wb') as f:
1406 while True:
1407 chunk = connection.read(ZIPPED_FILE_CHUNK)
1408 if not chunk:
1409 break
1410 size += len(chunk)
1411 f.write(decompressor.decompress(chunk))
1412 # Ensure that all the data was properly decompressed.
1413 uncompressed_data = decompressor.flush()
1414 assert not uncompressed_data
1415 except IOError as e:
1416 logging.error('Failed to download %s at %s.\n%s', item, dest, e)
1417 raise
1418 except httplib.HTTPException as e:
1419 msg = 'HTTPException while retrieving %s at %s.\n%s' % (item, dest, e)
1420 logging.error(msg)
1421 raise IOError(msg)
1422 except zlib.error as e:
1423 msg = 'Corrupted zlib for item %s. Processed %d of %s bytes.\n%s' % (
1424 item, size, content_length, e)
1425 logging.error(msg)
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001426
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001427 # Testing seems to show that if a few machines are trying to download
1428 # the same blob, they can cause each other to fail. So if we hit a
1429 # zip error, this is the most likely cause (it only downloads some of
1430 # the data). Randomly sleep for between 5 and 25 seconds to try and
1431 # spread out the downloads.
1432 # TODO(csharp): Switch from blobstorage to cloud storage and see if
1433 # that solves the issue.
1434 sleep_duration = (random.random() * 20) + 5
1435 time.sleep(sleep_duration)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001436
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001437 raise IOError(msg)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001438
vadimsh@chromium.org80f73002013-07-12 14:52:44 +00001439 @staticmethod
1440 def _copy_file(base_path, item, dest):
1441 source = os.path.join(base_path, item)
1442 if source == dest:
1443 logging.info('Source and destination are the same, no action required')
1444 return
1445 logging.debug('copy_file(%s, %s)', source, dest)
1446 shutil.copy(source, dest)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001447
1448
1449class CachePolicies(object):
1450 def __init__(self, max_cache_size, min_free_space, max_items):
1451 """
1452 Arguments:
1453 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1454 cache is effectively a leak.
1455 - min_free_space: Trim if disk free space becomes lower than this value. If
1456 0, it unconditionally fill the disk.
1457 - max_items: Maximum number of items to keep in the cache. If 0, do not
1458 enforce a limit.
1459 """
1460 self.max_cache_size = max_cache_size
1461 self.min_free_space = min_free_space
1462 self.max_items = max_items
1463
1464
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001465class NoCache(object):
1466 """This class is intended to be usable everywhere the Cache class is.
1467 Instead of downloading to a cache, all files are downloaded to the target
1468 directory and then moved to where they are needed.
1469 """
1470
1471 def __init__(self, target_directory, remote):
1472 self.target_directory = target_directory
1473 self.remote = remote
1474
1475 def retrieve(self, priority, item, size):
1476 """Get the request file."""
1477 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001478 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001479
1480 def wait_for(self, items):
1481 """Download the first item of the given list if it is missing."""
1482 item = items.iterkeys().next()
1483
1484 if not os.path.exists(self.path(item)):
1485 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001486 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001487 assert downloaded == item
1488
1489 return item
1490
1491 def path(self, item):
1492 return os.path.join(self.target_directory, item)
1493
1494
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001495class Cache(object):
1496 """Stateful LRU cache.
1497
1498 Saves its state as json file.
1499 """
1500 STATE_FILE = 'state.json'
1501
1502 def __init__(self, cache_dir, remote, policies):
1503 """
1504 Arguments:
1505 - cache_dir: Directory where to place the cache.
1506 - remote: Remote where to fetch items from.
1507 - policies: cache retention policies.
1508 """
1509 self.cache_dir = cache_dir
1510 self.remote = remote
1511 self.policies = policies
1512 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001513 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1514 # self.state[0] is the oldest item.
1515 self.state = []
1516 self._state_need_to_be_saved = False
1517 # A lookup map to speed up searching.
1518 self._lookup = {}
1519 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001520
1521 # Items currently being fetched. Keep it local to reduce lock contention.
1522 self._pending_queue = set()
1523
1524 # Profiling values.
1525 self._added = []
1526 self._removed = []
1527 self._free_disk = 0
1528
maruel@chromium.org770993b2012-12-11 17:16:48 +00001529 with Profiler('Setup'):
1530 if not os.path.isdir(self.cache_dir):
1531 os.makedirs(self.cache_dir)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001532 if os.path.isfile(self.state_file):
1533 try:
1534 self.state = json.load(open(self.state_file, 'r'))
1535 except (IOError, ValueError), e:
1536 # Too bad. The file will be overwritten and the cache cleared.
1537 logging.error(
1538 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1539 self._state_need_to_be_saved = True
1540 if (not isinstance(self.state, list) or
1541 not all(
1542 isinstance(i, (list, tuple)) and len(i) == 2
1543 for i in self.state)):
1544 # Discard.
1545 self._state_need_to_be_saved = True
1546 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001547
maruel@chromium.org770993b2012-12-11 17:16:48 +00001548 # Ensure that all files listed in the state still exist and add new ones.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001549 previous = set(filename for filename, _ in self.state)
1550 if len(previous) != len(self.state):
1551 logging.warning('Cache state is corrupted, found duplicate files')
1552 self._state_need_to_be_saved = True
1553 self.state = []
1554
1555 added = 0
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001556 for filename in os.listdir(self.cache_dir):
1557 if filename == self.STATE_FILE:
1558 continue
1559 if filename in previous:
1560 previous.remove(filename)
1561 continue
1562 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001563 if not RE_IS_SHA1.match(filename):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001564 logging.warning('Removing unknown file %s from cache', filename)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001565 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001566 continue
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001567 # Insert as the oldest file. It will be deleted eventually if not
1568 # accessed.
1569 self._add(filename, False)
1570 logging.warning('Add unknown file %s to cache', filename)
1571 added += 1
maruel@chromium.org770993b2012-12-11 17:16:48 +00001572
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001573 if added:
1574 logging.warning('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001575 if previous:
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001576 logging.warning('Removed %d lost files', len(previous))
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001577 # Set explicitly in case self._add() wasn't called.
1578 self._state_need_to_be_saved = True
1579 # Filter out entries that were not found while keeping the previous
1580 # order.
1581 self.state = [
1582 (filename, size) for filename, size in self.state
1583 if filename not in previous
1584 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001585 self.trim()
1586
1587 def __enter__(self):
1588 return self
1589
1590 def __exit__(self, _exc_type, _exec_value, _traceback):
1591 with Profiler('CleanupTrimming'):
1592 self.trim()
1593
1594 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001595 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001596 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001597 '%5d (%8dkb) current',
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001598 len(self.state),
1599 sum(i[1] for i in self.state) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001600 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001601 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1602 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001603
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001604 def remove_file_at_index(self, index):
1605 """Removes the file at the given index."""
1606 try:
1607 self._state_need_to_be_saved = True
1608 filename, size = self.state.pop(index)
1609 # If the lookup was already stale, its possible the filename was not
1610 # present yet.
1611 self._lookup_is_stale = True
1612 self._lookup.pop(filename, None)
1613 self._removed.append(size)
1614 os.remove(self.path(filename))
1615 except OSError as e:
1616 logging.error('Error attempting to delete a file\n%s' % e)
1617
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001618 def remove_lru_file(self):
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001619 """Removes the last recently used file."""
1620 self.remove_file_at_index(0)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001621
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001622 def trim(self):
1623 """Trims anything we don't know, make sure enough free space exists."""
1624 # Ensure maximum cache size.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001625 if self.policies.max_cache_size and self.state:
1626 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1627 self.remove_lru_file()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001628
1629 # Ensure maximum number of items in the cache.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001630 if self.policies.max_items and self.state:
1631 while len(self.state) > self.policies.max_items:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001632 self.remove_lru_file()
1633
1634 # Ensure enough free space.
1635 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001636 trimmed_due_to_space = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001637 while (
1638 self.policies.min_free_space and
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001639 self.state and
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001640 self._free_disk < self.policies.min_free_space):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001641 trimmed_due_to_space = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001642 self.remove_lru_file()
1643 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001644 if trimmed_due_to_space:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001645 total = sum(i[1] for i in self.state)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001646 logging.warning(
1647 'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
1648 'cache (%.1f%% of its maximum capacity)',
1649 self._free_disk / 1024.,
1650 total / 1024.,
1651 100. * self.policies.max_cache_size / float(total),
1652 )
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001653 self.save()
1654
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001655 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001656 """Retrieves a file from the remote, if not already cached, and adds it to
1657 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001658
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001659 If the file is in the cache, verifiy that the file is valid (i.e. it is
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001660 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001661 """
1662 assert not '/' in item
1663 path = self.path(item)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001664 self._update_lookup()
1665 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001666
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001667 if index is not None:
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001668 if not valid_file(self.path(item), size):
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001669 self.remove_file_at_index(index)
1670 index = None
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001671 else:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001672 assert index < len(self.state)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001673 # Was already in cache. Update it's LRU value by putting it at the end.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001674 self._state_need_to_be_saved = True
1675 self._lookup_is_stale = True
1676 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001677
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001678 if index is None:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001679 if item in self._pending_queue:
1680 # Already pending. The same object could be referenced multiple times.
1681 return
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001682 # TODO(maruel): It should look at the free disk space, the current cache
1683 # size and the size of the new item on every new item:
1684 # - Trim the cache as more entries are listed when free disk space is low,
1685 # otherwise if the amount of data downloaded during the run > free disk
1686 # space, it'll crash.
1687 # - Make sure there's enough free disk space to fit all dependencies of
1688 # this run! If not, abort early.
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001689 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001690 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001691
1692 def add(self, filepath, obj):
1693 """Forcibly adds a file to the cache."""
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001694 self._update_lookup()
1695 if not obj in self._lookup:
maruel@chromium.orgba6489b2013-07-11 20:23:33 +00001696 link_file(self.path(obj), filepath, HARDLINK_WITH_FALLBACK)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001697 self._add(obj, True)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001698
1699 def path(self, item):
1700 """Returns the path to one item."""
1701 return os.path.join(self.cache_dir, item)
1702
1703 def save(self):
1704 """Saves the LRU ordering."""
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001705 if self._state_need_to_be_saved:
1706 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1707 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001708
1709 def wait_for(self, items):
1710 """Starts a loop that waits for at least one of |items| to be retrieved.
1711
1712 Returns the first item retrieved.
1713 """
1714 # Flush items already present.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001715 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001716 for item in items:
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001717 if item in self._lookup:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001718 return item
1719
1720 assert all(i in self._pending_queue for i in items), (
1721 items, self._pending_queue)
1722 # Note that:
1723 # len(self._pending_queue) ==
1724 # ( len(self.remote._workers) - self.remote._ready +
1725 # len(self._remote._queue) + len(self._remote.done))
1726 # There is no lock-free way to verify that.
1727 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001728 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001729 self._pending_queue.remove(item)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001730 self._add(item, True)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001731 if item in items:
1732 return item
1733
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001734 def _add(self, item, at_end):
1735 """Adds an item in the internal state.
1736
1737 If |at_end| is False, self._lookup becomes inconsistent and
1738 self._update_lookup() must be called.
1739 """
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001740 size = os.stat(self.path(item)).st_size
1741 self._added.append(size)
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001742 self._state_need_to_be_saved = True
1743 if at_end:
1744 self.state.append((item, size))
1745 self._lookup[item] = len(self.state) - 1
1746 else:
1747 self._lookup_is_stale = True
1748 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001749
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001750 def _update_lookup(self):
1751 if self._lookup_is_stale:
1752 self._lookup = dict(
1753 (filename, index) for index, (filename, _) in enumerate(self.state))
1754 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001755
1756
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001757class IsolatedFile(object):
1758 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001759 def __init__(self, obj_hash):
1760 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001761 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001762 self.obj_hash = obj_hash
1763 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001764 # .isolate and all the .isolated files recursively included by it with
1765 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1766 # .isolated file in the hash table, is important, as the later ones are not
1767 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001768 self.can_fetch = False
1769
1770 # Raw data.
1771 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001772 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001773 self.children = []
1774
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001775 # Set once the .isolated file is loaded.
1776 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001777 # Set once the files are fetched.
1778 self.files_fetched = False
1779
1780 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001781 """Verifies the .isolated file is valid and loads this object with the json
1782 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001783 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001784 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1785 assert not self._is_parsed
1786 self.data = load_isolated(content)
1787 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1788 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001789
1790 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001791 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001792
1793 Preemptively request files.
1794
1795 Note that |files| is modified by this function.
1796 """
1797 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001798 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001799 return
1800 logging.debug('fetch_files(%s)' % self.obj_hash)
1801 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001802 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001803 # overriden files must not be fetched.
1804 if filepath not in files:
1805 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001806 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001807 # Preemptively request files.
1808 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001809 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001810 self.files_fetched = True
1811
1812
1813class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001814 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001815 def __init__(self):
1816 self.command = []
1817 self.files = {}
1818 self.read_only = None
1819 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001820 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001821 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001822
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001823 def load(self, cache, root_isolated_hash):
1824 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001825
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001826 It enables support for "included" .isolated files. They are processed in
1827 strict order but fetched asynchronously from the cache. This is important so
1828 that a file in an included .isolated file that is overridden by an embedding
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001829 .isolated file is not fetched needlessly. The includes are fetched in one
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001830 pass and the files are fetched as soon as all the ones on the left-side
1831 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001832
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001833 The prioritization is very important here for nested .isolated files.
1834 'includes' have the highest priority and the algorithm is optimized for both
1835 deep and wide trees. A deep one is a long link of .isolated files referenced
1836 one at a time by one item in 'includes'. A wide one has a large number of
1837 'includes' in a single .isolated file. 'left' is defined as an included
1838 .isolated file earlier in the 'includes' list. So the order of the elements
1839 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001840 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001841 self.root = IsolatedFile(root_isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001842
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001843 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1844 pending = {}
1845 # Set of hashes of already retrieved items to refuse recursive includes.
1846 seen = set()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001847
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001848 def retrieve(isolated_file):
1849 h = isolated_file.obj_hash
1850 if h in seen:
1851 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1852 assert h not in pending
1853 seen.add(h)
1854 pending[h] = isolated_file
1855 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
1856
1857 retrieve(self.root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001858
1859 while pending:
1860 item_hash = cache.wait_for(pending)
1861 item = pending.pop(item_hash)
1862 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001863 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001864 # It's the root item.
1865 item.can_fetch = True
1866
1867 for new_child in item.children:
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001868 retrieve(new_child)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001869
1870 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001871 self._traverse_tree(cache, self.root)
1872
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001873 def check(n):
1874 return all(check(x) for x in n.children) and n.files_fetched
1875 assert check(self.root)
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001876
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001877 self.relative_cwd = self.relative_cwd or ''
1878 self.read_only = self.read_only or False
1879
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001880 def _traverse_tree(self, cache, node):
1881 if node.can_fetch:
1882 if not node.files_fetched:
1883 self._update_self(cache, node)
1884 will_break = False
1885 for i in node.children:
1886 if not i.can_fetch:
1887 if will_break:
1888 break
1889 # Automatically mark the first one as fetcheable.
1890 i.can_fetch = True
1891 will_break = True
1892 self._traverse_tree(cache, i)
1893
1894 def _update_self(self, cache, node):
1895 node.fetch_files(cache, self.files)
1896 # Grabs properties.
1897 if not self.command and node.data.get('command'):
1898 self.command = node.data['command']
1899 if self.read_only is None and node.data.get('read_only') is not None:
1900 self.read_only = node.data['read_only']
1901 if (self.relative_cwd is None and
1902 node.data.get('relative_cwd') is not None):
1903 self.relative_cwd = node.data['relative_cwd']
1904
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001905
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001906def create_directories(base_directory, files):
1907 """Creates the directory structure needed by the given list of files."""
1908 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1909 # Creates the tree of directories to create.
1910 directories = set(os.path.dirname(f) for f in files)
1911 for item in list(directories):
1912 while item:
1913 directories.add(item)
1914 item = os.path.dirname(item)
1915 for d in sorted(directories):
1916 if d:
1917 os.mkdir(os.path.join(base_directory, d))
1918
1919
1920def create_links(base_directory, files):
1921 """Creates any links needed by the given set of files."""
1922 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001923 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001924 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001925 if sys.platform == 'win32':
1926 # TODO(maruel): Create junctions or empty text files similar to what
1927 # cygwin do?
1928 logging.warning('Ignoring symlink %s', filepath)
1929 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001930 outfile = os.path.join(base_directory, filepath)
1931 # symlink doesn't exist on Windows. So the 'link' property should
1932 # never be specified for windows .isolated file.
1933 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1934 if 'm' in properties:
1935 lchmod = getattr(os, 'lchmod', None)
1936 if lchmod:
1937 lchmod(outfile, properties['m'])
1938
1939
1940def setup_commands(base_directory, cwd, cmd):
1941 """Correctly adjusts and then returns the required working directory
1942 and command needed to run the test.
1943 """
1944 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1945 cwd = os.path.join(base_directory, cwd)
1946 if not os.path.isdir(cwd):
1947 os.makedirs(cwd)
1948
1949 # Ensure paths are correctly separated on windows.
1950 cmd[0] = cmd[0].replace('/', os.path.sep)
1951 cmd = fix_python_path(cmd)
1952
1953 return cwd, cmd
1954
1955
1956def generate_remaining_files(files):
1957 """Generates a dictionary of all the remaining files to be downloaded."""
1958 remaining = {}
1959 for filepath, props in files:
1960 if 'h' in props:
1961 remaining.setdefault(props['h'], []).append((filepath, props))
1962
1963 return remaining
1964
1965
1966def download_test_data(isolated_hash, target_directory, remote):
1967 """Downloads the dependencies to the given directory."""
1968 if not os.path.exists(target_directory):
1969 os.makedirs(target_directory)
1970
1971 settings = Settings()
1972 no_cache = NoCache(target_directory, Remote(remote))
1973
1974 # Download all the isolated files.
1975 with Profiler('GetIsolateds') as _prof:
1976 settings.load(no_cache, isolated_hash)
1977
1978 if not settings.command:
1979 print >> sys.stderr, 'No command to run'
1980 return 1
1981
1982 with Profiler('GetRest') as _prof:
1983 create_directories(target_directory, settings.files)
1984 create_links(target_directory, settings.files.iteritems())
1985
1986 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1987 settings.command[:])
1988
1989 remaining = generate_remaining_files(settings.files.iteritems())
1990
1991 # Now block on the remaining files to be downloaded and mapped.
1992 logging.info('Retrieving remaining files')
1993 last_update = time.time()
1994 while remaining:
1995 obj = no_cache.wait_for(remaining)
1996 files = remaining.pop(obj)
1997
1998 for i, (filepath, properties) in enumerate(files):
1999 outfile = os.path.join(target_directory, filepath)
2000 logging.info(no_cache.path(obj))
2001
2002 if i + 1 == len(files):
2003 os.rename(no_cache.path(obj), outfile)
2004 else:
2005 shutil.copyfile(no_cache.path(obj), outfile)
2006
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00002007 if 'm' in properties and not sys.platform == 'win32':
2008 # It's not set on Windows. It could be set only in the case of
2009 # downloading content generated from another OS. Do not crash in that
2010 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002011 os.chmod(outfile, properties['m'])
2012
2013 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
csharp@chromium.org5daba352013-07-03 17:29:27 +00002014 msg = '%d files remaining...' % len(remaining)
2015 print msg
2016 logging.info(msg)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002017 last_update = time.time()
2018
2019 print('.isolated files successfully downloaded and setup in %s' %
2020 target_directory)
2021 print('To run this test please run the command %s from the directory %s' %
2022 (cmd, cwd))
2023
2024 return 0
2025
2026
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002027def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002028 """Downloads the dependencies in the cache, hardlinks them into a temporary
2029 directory and runs the executable.
2030 """
2031 settings = Settings()
2032 with Cache(cache_dir, Remote(remote), policies) as cache:
2033 outdir = make_temp_dir('run_tha_test', cache_dir)
2034 try:
2035 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002036 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002037 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002038 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002039 # Adds it in the cache. While not strictly necessary, this simplifies
2040 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00002041 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002042 cache.add(isolated_hash, h)
2043 isolated_hash = h
2044 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002045
2046 if not settings.command:
2047 print >> sys.stderr, 'No command to run'
2048 return 1
2049
2050 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002051 create_directories(outdir, settings.files)
2052 create_links(outdir, settings.files.iteritems())
2053 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002054
2055 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002056 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
2057 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002058
2059 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00002060 logging.info('Retrieving remaining files')
2061 last_update = time.time()
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002062 with DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
2063 while remaining:
2064 detector.ping()
2065 obj = cache.wait_for(remaining)
2066 for filepath, properties in remaining.pop(obj):
2067 outfile = os.path.join(outdir, filepath)
maruel@chromium.orgb7c003d2013-07-24 13:04:30 +00002068 link_file(outfile, cache.path(obj), HARDLINK)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002069 if 'm' in properties:
2070 # It's not set on Windows.
2071 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002072
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002073 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
2074 msg = '%d files remaining...' % len(remaining)
2075 print msg
2076 logging.info(msg)
2077 last_update = time.time()
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00002078
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002079 if settings.read_only:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00002080 logging.info('Making files read only')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002081 make_writable(outdir, True)
2082 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00002083
2084 # TODO(csharp): This should be specified somewhere else.
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00002085 # TODO(vadimsh): Pass it via 'env_vars' in manifest.
csharp@chromium.orge217f302012-11-22 16:51:53 +00002086 # Add a rotating log file if one doesn't already exist.
2087 env = os.environ.copy()
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00002088 env.setdefault('RUN_TEST_CASES_LOG_FILE',
2089 os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002090 try:
2091 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00002092 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002093 except OSError:
2094 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
2095 raise
2096 finally:
2097 rmtree(outdir)
2098
2099
maruel@chromium.orgea101982013-07-24 15:54:29 +00002100class OptionParserWithLogging(optparse.OptionParser):
2101 """Adds --verbose option."""
2102 def __init__(self, verbose=0, log_file=None, **kwargs):
2103 kwargs.setdefault('description', sys.modules['__main__'].__doc__)
2104 optparse.OptionParser.__init__(self, **kwargs)
2105 self.add_option(
2106 '-v', '--verbose',
2107 action='count',
2108 default=verbose,
2109 help='Use multiple times to increase verbosity')
2110 self.add_option(
2111 '-l', '--log_file',
2112 default=log_file,
2113 help='The name of the file to store rotating log details.')
2114
2115 def parse_args(self, *args, **kwargs):
2116 options, args = optparse.OptionParser.parse_args(self, *args, **kwargs)
2117 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
2118 level = levels[min(len(levels) - 1, options.verbose)]
2119
2120 logging_console = logging.StreamHandler()
2121 logging_console.setFormatter(logging.Formatter(
2122 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
2123 logging_console.setLevel(level)
2124 logging.getLogger().setLevel(level)
2125 logging.getLogger().addHandler(logging_console)
2126
2127 if options.log_file:
2128 # This is necessary otherwise attached handler will miss the messages.
2129 logging.getLogger().setLevel(logging.DEBUG)
2130
2131 logging_rotating_file = logging.handlers.RotatingFileHandler(
2132 options.log_file,
2133 maxBytes=10 * 1024 * 1024,
2134 backupCount=5,
2135 encoding='utf-8')
2136 # log files are always at DEBUG level.
2137 logging_rotating_file.setLevel(logging.DEBUG)
2138 logging_rotating_file.setFormatter(logging.Formatter(
2139 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
2140 logging.getLogger().addHandler(logging_rotating_file)
2141
2142 return options, args
2143
2144
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002145def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00002146 disable_buffering()
maruel@chromium.orgea101982013-07-24 15:54:29 +00002147 parser = OptionParserWithLogging(
2148 usage='%prog <options>', log_file=RUN_ISOLATED_LOG_FILE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002149
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002150 group = optparse.OptionGroup(parser, 'Download')
2151 group.add_option(
2152 '--download', metavar='DEST',
2153 help='Downloads files to DEST and returns without running, instead of '
2154 'downloading and then running from a temporary directory.')
2155 parser.add_option_group(group)
2156
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002157 group = optparse.OptionGroup(parser, 'Data source')
2158 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002159 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002160 metavar='FILE',
2161 help='File/url describing what to map or run')
2162 group.add_option(
2163 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002164 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002165 parser.add_option_group(group)
2166
2167 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002168 '-r', '--remote', metavar='URL',
2169 default=
2170 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
2171 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002172 group = optparse.OptionGroup(parser, 'Cache management')
2173 group.add_option(
2174 '--cache',
2175 default='cache',
2176 metavar='DIR',
2177 help='Cache directory, default=%default')
2178 group.add_option(
2179 '--max-cache-size',
2180 type='int',
2181 metavar='NNN',
2182 default=20*1024*1024*1024,
2183 help='Trim if the cache gets larger than this value, default=%default')
2184 group.add_option(
2185 '--min-free-space',
2186 type='int',
2187 metavar='NNN',
maruel@chromium.org9e98e432013-05-31 17:06:51 +00002188 default=2*1024*1024*1024,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002189 help='Trim if disk free space becomes lower than this value, '
2190 'default=%default')
2191 group.add_option(
2192 '--max-items',
2193 type='int',
2194 metavar='NNN',
2195 default=100000,
2196 help='Trim if more than this number of items are in the cache '
2197 'default=%default')
2198 parser.add_option_group(group)
2199
2200 options, args = parser.parse_args()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002201
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002202 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00002203 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00002204 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002205 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00002206 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002207 parser.error('Unsupported args %s' % ' '.join(args))
2208
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002209 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002210 policies = CachePolicies(
2211 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00002212
2213 if options.download:
2214 return download_test_data(options.isolated or options.hash,
2215 options.download, options.remote)
2216 else:
2217 try:
2218 return run_tha_test(
2219 options.isolated or options.hash,
2220 options.cache,
2221 options.remote,
2222 policies)
2223 except Exception, e:
2224 # Make sure any exception is logged.
2225 logging.exception(e)
2226 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002227
2228
2229if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00002230 # Ensure that we are always running with the correct encoding.
2231 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00002232 sys.exit(main())