blob: 0c38a8428fe430d83910f694f9e2660875f0b6b9 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000016import itertools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000018import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000019import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000020import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000021import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000022import optparse
23import os
24import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000025import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000026import re
27import shutil
28import stat
29import subprocess
30import sys
31import tempfile
32import threading
33import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000034import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000035import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000036import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000037import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000038import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000039
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000040# Try to import 'upload' module used by AppEngineService for authentication.
41# If it is not there, app engine authentication support will be disabled.
42try:
43 from third_party import upload
44 # Hack out upload logging.info()
45 upload.logging = logging.getLogger('upload')
46 # Mac pylint choke on this line.
47 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
48except ImportError:
49 upload = None
50
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000051
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000052# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000053HARDLINK, SYMLINK, COPY = range(1, 4)
54
55RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
56
csharp@chromium.org8dc52542012-11-08 20:29:55 +000057# The file size to be used when we don't know the correct file size,
58# generally used for .isolated files.
59UNKNOWN_FILE_SIZE = None
60
csharp@chromium.orga92403f2012-11-20 15:13:59 +000061# The size of each chunk to read when downloading and unzipping files.
62ZIPPED_FILE_CHUNK = 16 * 1024
63
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000064# The name of the log file to use.
65RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
66
csharp@chromium.orge217f302012-11-22 16:51:53 +000067# The base directory containing this file.
68BASE_DIR = os.path.dirname(os.path.abspath(__file__))
69
70# The name of the log to use for the run_test_cases.py command
71RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
72
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000073# The delay (in seconds) to wait between logging statements when retrieving
74# the required files. This is intended to let the user (or buildbot) know that
75# the program is still running.
76DELAY_BETWEEN_UPDATES_IN_SECS = 30
77
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000078# The name of the key to store the count of url attempts.
79COUNT_KEY = 'UrlOpenAttempt'
80
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000081# Default maximum number of attempts to trying opening a url before aborting.
82URL_OPEN_MAX_ATTEMPTS = 30
83# Default timeout when retrying.
84URL_OPEN_TIMEOUT = 6*60.
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000085
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000086# Global (for now) map: server URL (http://example.com) -> HttpService instance.
87# Used by get_http_service to cache HttpService instances.
88_http_services = {}
89_http_services_lock = threading.Lock()
90
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +000091# Used by get_flavor().
92FLAVOR_MAPPING = {
93 'cygwin': 'win',
94 'win32': 'win',
95 'darwin': 'mac',
96 'sunos5': 'solaris',
97 'freebsd7': 'freebsd',
98 'freebsd8': 'freebsd',
99}
100
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000101
102class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000103 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000104 pass
105
106
107class MappingError(OSError):
108 """Failed to recreate the tree."""
109 pass
110
111
112def get_flavor():
113 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000114 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000115
116
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000117def fix_default_encoding():
118 """Forces utf8 solidly on all platforms.
119
120 By default python execution environment is lazy and defaults to ascii
121 encoding.
122
123 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
124 """
125 if sys.getdefaultencoding() == 'utf-8':
126 return False
127
128 # Regenerate setdefaultencoding.
129 reload(sys)
130 # Module 'sys' has no 'setdefaultencoding' member
131 # pylint: disable=E1101
132 sys.setdefaultencoding('utf-8')
133 for attr in dir(locale):
134 if attr[0:3] != 'LC_':
135 continue
136 aref = getattr(locale, attr)
137 try:
138 locale.setlocale(aref, '')
139 except locale.Error:
140 continue
141 try:
142 lang = locale.getlocale(aref)[0]
143 except (TypeError, ValueError):
144 continue
145 if lang:
146 try:
147 locale.setlocale(aref, (lang, 'UTF-8'))
148 except locale.Error:
149 os.environ[attr] = lang + '.UTF-8'
150 try:
151 locale.setlocale(locale.LC_ALL, '')
152 except locale.Error:
153 pass
154 return True
155
156
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000157class Unbuffered(object):
158 """Disable buffering on a file object."""
159 def __init__(self, stream):
160 self.stream = stream
161
162 def write(self, data):
163 self.stream.write(data)
164 if '\n' in data:
165 self.stream.flush()
166
167 def __getattr__(self, attr):
168 return getattr(self.stream, attr)
169
170
171def disable_buffering():
172 """Makes this process and child processes stdout unbuffered."""
173 if not os.environ.get('PYTHONUNBUFFERED'):
174 # Since sys.stdout is a C++ object, it's impossible to do
175 # sys.stdout.write = lambda...
176 sys.stdout = Unbuffered(sys.stdout)
177 os.environ['PYTHONUNBUFFERED'] = 'x'
178
179
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000180def os_link(source, link_name):
181 """Add support for os.link() on Windows."""
182 if sys.platform == 'win32':
183 if not ctypes.windll.kernel32.CreateHardLinkW(
184 unicode(link_name), unicode(source), 0):
185 raise OSError()
186 else:
187 os.link(source, link_name)
188
189
190def readable_copy(outfile, infile):
191 """Makes a copy of the file that is readable by everyone."""
192 shutil.copy(infile, outfile)
193 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
194 stat.S_IRGRP | stat.S_IROTH)
195 os.chmod(outfile, read_enabled_mode)
196
197
198def link_file(outfile, infile, action):
199 """Links a file. The type of link depends on |action|."""
200 logging.debug('Mapping %s to %s' % (infile, outfile))
201 if action not in (HARDLINK, SYMLINK, COPY):
202 raise ValueError('Unknown mapping action %s' % action)
203 if not os.path.isfile(infile):
204 raise MappingError('%s is missing' % infile)
205 if os.path.isfile(outfile):
206 raise MappingError(
207 '%s already exist; insize:%d; outsize:%d' %
208 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
209
210 if action == COPY:
211 readable_copy(outfile, infile)
212 elif action == SYMLINK and sys.platform != 'win32':
213 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000214 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000215 else:
216 try:
217 os_link(infile, outfile)
218 except OSError:
219 # Probably a different file system.
maruel@chromium.org9e98e432013-05-31 17:06:51 +0000220 logging.warning(
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000221 'Failed to hardlink, failing back to copy %s to %s' % (
222 infile, outfile))
223 readable_copy(outfile, infile)
224
225
226def _set_write_bit(path, read_only):
227 """Sets or resets the executable bit on a file or directory."""
228 mode = os.lstat(path).st_mode
229 if read_only:
230 mode = mode & 0500
231 else:
232 mode = mode | 0200
233 if hasattr(os, 'lchmod'):
234 os.lchmod(path, mode) # pylint: disable=E1101
235 else:
236 if stat.S_ISLNK(mode):
237 # Skip symlink without lchmod() support.
238 logging.debug('Can\'t change +w bit on symlink %s' % path)
239 return
240
241 # TODO(maruel): Implement proper DACL modification on Windows.
242 os.chmod(path, mode)
243
244
245def make_writable(root, read_only):
246 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000247 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000248 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
249 for filename in filenames:
250 _set_write_bit(os.path.join(dirpath, filename), read_only)
251
252 for dirname in dirnames:
253 _set_write_bit(os.path.join(dirpath, dirname), read_only)
254
255
256def rmtree(root):
257 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
258 make_writable(root, False)
259 if sys.platform == 'win32':
260 for i in range(3):
261 try:
262 shutil.rmtree(root)
263 break
264 except WindowsError: # pylint: disable=E0602
265 delay = (i+1)*2
266 print >> sys.stderr, (
267 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
268 time.sleep(delay)
269 else:
270 shutil.rmtree(root)
271
272
273def is_same_filesystem(path1, path2):
274 """Returns True if both paths are on the same filesystem.
275
276 This is required to enable the use of hardlinks.
277 """
278 assert os.path.isabs(path1), path1
279 assert os.path.isabs(path2), path2
280 if sys.platform == 'win32':
281 # If the drive letter mismatches, assume it's a separate partition.
282 # TODO(maruel): It should look at the underlying drive, a drive letter could
283 # be a mount point to a directory on another drive.
284 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
285 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
286 if path1[0].lower() != path2[0].lower():
287 return False
288 return os.stat(path1).st_dev == os.stat(path2).st_dev
289
290
291def get_free_space(path):
292 """Returns the number of free bytes."""
293 if sys.platform == 'win32':
294 free_bytes = ctypes.c_ulonglong(0)
295 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
296 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
297 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000298 # For OSes other than Windows.
299 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000300 return f.f_bfree * f.f_frsize
301
302
303def make_temp_dir(prefix, root_dir):
304 """Returns a temporary directory on the same file system as root_dir."""
305 base_temp_dir = None
306 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
307 base_temp_dir = os.path.dirname(root_dir)
308 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
309
310
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000311def load_isolated(content):
312 """Verifies the .isolated file is valid and loads this object with the json
313 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000314 """
315 try:
316 data = json.loads(content)
317 except ValueError:
318 raise ConfigError('Failed to parse: %s...' % content[:100])
319
320 if not isinstance(data, dict):
321 raise ConfigError('Expected dict, got %r' % data)
322
323 for key, value in data.iteritems():
324 if key == 'command':
325 if not isinstance(value, list):
326 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000327 if not value:
328 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000329 for subvalue in value:
330 if not isinstance(subvalue, basestring):
331 raise ConfigError('Expected string, got %r' % subvalue)
332
333 elif key == 'files':
334 if not isinstance(value, dict):
335 raise ConfigError('Expected dict, got %r' % value)
336 for subkey, subvalue in value.iteritems():
337 if not isinstance(subkey, basestring):
338 raise ConfigError('Expected string, got %r' % subkey)
339 if not isinstance(subvalue, dict):
340 raise ConfigError('Expected dict, got %r' % subvalue)
341 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000342 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000343 if not isinstance(subsubvalue, basestring):
344 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000345 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000346 if not isinstance(subsubvalue, int):
347 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000348 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 if not RE_IS_SHA1.match(subsubvalue):
350 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000351 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000352 if not isinstance(subsubvalue, int):
353 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000354 else:
355 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000356 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000357 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000358 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
359 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000360
361 elif key == 'includes':
362 if not isinstance(value, list):
363 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000364 if not value:
365 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000366 for subvalue in value:
367 if not RE_IS_SHA1.match(subvalue):
368 raise ConfigError('Expected sha-1, got %r' % subvalue)
369
370 elif key == 'read_only':
371 if not isinstance(value, bool):
372 raise ConfigError('Expected bool, got %r' % value)
373
374 elif key == 'relative_cwd':
375 if not isinstance(value, basestring):
376 raise ConfigError('Expected string, got %r' % value)
377
378 elif key == 'os':
379 if value != get_flavor():
380 raise ConfigError(
381 'Expected \'os\' to be \'%s\' but got \'%s\'' %
382 (get_flavor(), value))
383
384 else:
385 raise ConfigError('Unknown key %s' % key)
386
387 return data
388
389
390def fix_python_path(cmd):
391 """Returns the fixed command line to call the right python executable."""
392 out = cmd[:]
393 if out[0] == 'python':
394 out[0] = sys.executable
395 elif out[0].endswith('.py'):
396 out.insert(0, sys.executable)
397 return out
398
399
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000400def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000401 """Attempts to open the given url multiple times.
402
403 |data| can be either:
404 -None for a GET request
405 -str for pre-encoded data
406 -list for data to be encoded
407 -dict for data to be encoded (COUNT_KEY will be added in this case)
408
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000409 Returns a file-like object, where the response may be read from, or None
410 if it was unable to connect.
411 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000412 urlhost, urlpath = split_server_request_url(url)
413 service = get_http_service(urlhost)
414 return service.request(urlpath, **kwargs)
415
416
417def split_server_request_url(url):
418 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000419 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000420 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
421 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
422 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000423
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000424
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000425def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000426 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000427 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000428 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000429 # Ensure consistency.
430 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000431 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000432 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000433 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000434 service = AppEngineService(urlhost)
435 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000436 return service
437
438
439class HttpService(object):
440 """Base class for a class that provides an API to HTTP based service:
441 - Provides 'request' method.
442 - Supports automatic request retries.
443 - Supports persistent cookies.
444 - Thread safe.
445 """
446
447 # File to use to store all auth cookies.
maruel@chromium.org16452a32013-04-05 00:18:44 +0000448 COOKIE_FILE = os.path.join('~', '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000449
450 # CookieJar reused by all services + lock that protects its instantiation.
451 _cookie_jar = None
452 _cookie_jar_lock = threading.Lock()
453
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000454 def __init__(self, urlhost):
455 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000456 self.cookie_jar = self.load_cookie_jar()
457 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000458
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000459 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000460 """Called when HTTP server asks client to authenticate.
461 Can be implemented in subclasses.
462 """
463 return False
464
465 @staticmethod
466 def load_cookie_jar():
467 """Returns global CoookieJar object that stores cookies in the file."""
468 with HttpService._cookie_jar_lock:
469 if HttpService._cookie_jar is not None:
470 return HttpService._cookie_jar
471 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
472 jar.load()
473 HttpService._cookie_jar = jar
474 return jar
475
476 @staticmethod
477 def save_cookie_jar():
478 """Called when cookie jar needs to be flushed to disk."""
479 with HttpService._cookie_jar_lock:
480 if HttpService._cookie_jar is not None:
481 HttpService._cookie_jar.save()
482
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000483 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000484 """Returns OpenerDirector that will be used when sending requests.
485 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000486 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000487
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000488 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000489 """Attempts to open the given url multiple times.
490
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000491 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000492
493 |data| can be either:
494 -None for a GET request
495 -str for pre-encoded data
496 -list for data to be encoded
497 -dict for data to be encoded (COUNT_KEY will be added in this case)
498
499 Returns a file-like object, where the response may be read from, or None
500 if it was unable to connect.
501 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000502 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000503
504 if isinstance(data, dict) and COUNT_KEY in data:
505 logging.error('%s already existed in the data passed into UlrOpen. It '
506 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
507 return None
508
509 method = 'GET' if data is None else 'POST'
510 assert not ((method != 'POST') and content_type), (
511 'Can\'t use content_type on GET')
512
513 def make_request(extra):
514 """Returns a urllib2.Request instance for this specific retry."""
515 if isinstance(data, str) or data is None:
516 payload = data
517 else:
518 if isinstance(data, dict):
519 payload = data.items()
520 else:
521 payload = data[:]
522 payload.extend(extra.iteritems())
523 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000524 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000525 if isinstance(data, str) or data is None:
526 # In these cases, add the extra parameter to the query part of the url.
527 url_parts = list(urlparse.urlparse(new_url))
528 # Append the query parameter.
529 if url_parts[4] and extra:
530 url_parts[4] += '&'
531 url_parts[4] += urllib.urlencode(extra)
532 new_url = urlparse.urlunparse(url_parts)
533 request = urllib2.Request(new_url, data=payload)
534 if payload is not None:
535 if content_type:
536 request.add_header('Content-Type', content_type)
537 request.add_header('Content-Length', len(payload))
538 return request
539
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000540 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000541
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000542 def _retry_loop(
543 self,
544 make_request,
545 max_attempts=URL_OPEN_MAX_ATTEMPTS,
546 retry_404=False,
547 retry_50x=True,
548 timeout=URL_OPEN_TIMEOUT):
549 """Runs internal request-retry loop.
550
551 - Optionally retries HTTP 404 and 50x.
552 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
553 number of retries.
554 - Retries up to |timeout| duration in seconds. If None or 0, there's no
555 limit in the time taken to do retries.
556 - If both |max_attempts| and |timeout| are None or 0, this functions retries
557 indefinitely.
558 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000559 authenticated = False
560 last_error = None
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000561 attempt = 0
562 start = self._now()
563 for attempt in itertools.count():
564 if max_attempts and attempt >= max_attempts:
565 # Too many attempts.
566 break
567 if timeout and (self._now() - start) >= timeout:
568 # Retried for too long.
569 break
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000570 extra = {COUNT_KEY: attempt} if attempt else {}
571 request = make_request(extra)
572 try:
573 url_response = self._url_open(request)
574 logging.debug('url_open(%s) succeeded', request.get_full_url())
575 return url_response
576 except urllib2.HTTPError as e:
577 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000578 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000579 # Try to authenticate only once. If it doesn't help, then server does
580 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000581 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000582 'Authentication is required for %s on attempt %d.\n%s',
583 request.get_full_url(), attempt,
584 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000585 if not authenticated and self.authenticate():
586 authenticated = True
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000587 # Do not sleep.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000588 continue
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000589 # If authentication failed, return.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000590 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000591 'Unable to authenticate to %s.\n%s',
592 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000593 return None
594
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000595 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
596 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000597 # This HTTPError means we reached the server and there was a problem
598 # with the request, so don't retry.
599 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000600 'Able to connect to %s but an exception was thrown.\n%s',
601 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000602 return None
603
604 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000605 logging.warning('Able to connect to %s on attempt %d.\n%s',
606 request.get_full_url(), attempt,
607 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000608 last_error = e
609
610 except (urllib2.URLError, httplib.HTTPException) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000611 logging.warning('Unable to open url %s on attempt %d.\n%s',
612 request.get_full_url(), attempt,
613 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000614 last_error = e
615
616 # Only sleep if we are going to try again.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000617 if max_attempts and attempt != max_attempts:
618 remaining = None
619 if timeout:
620 remaining = timeout - (self._now() - start)
621 if remaining <= 0:
622 break
623 self.sleep_before_retry(attempt, remaining)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000624
625 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000626 request.get_full_url(), max_attempts,
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000627 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000628 return None
629
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000630 def _url_open(self, request):
631 """Low level method to execute urllib2.Request's.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000632
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000633 To be mocked in tests.
634 """
635 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000636
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000637 @staticmethod
638 def _now():
639 """To be mocked in tests."""
640 return time.time()
641
642 @staticmethod
643 def calculate_sleep_before_retry(attempt, max_duration):
644 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
645 # survive.
646 MAX_SLEEP = 10.
647 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
648 # time by starting with 1.5/2+1.5^-1 median offset.
649 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
650 assert duration > 0.1
651 duration = min(MAX_SLEEP, duration)
652 if max_duration:
653 duration = min(max_duration, duration)
654 return duration
655
656 @classmethod
657 def sleep_before_retry(cls, attempt, max_duration):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000658 """Sleeps for some amount of time when retrying the request.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000659
660 To be mocked in tests.
661 """
662 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
maruel@chromium.orgef333122013-03-12 20:36:40 +0000663
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000664 @staticmethod
665 def _format_exception(exc, verbose=False):
666 """Given an instance of some exception raised by urlopen returns human
667 readable piece of text with detailed information about the error.
668 """
669 out = ['Exception: %s' % (exc,)]
670 if verbose:
671 if isinstance(exc, urllib2.HTTPError):
672 out.append('-' * 10)
673 if exc.hdrs:
674 for header, value in exc.hdrs.items():
675 if not header.startswith('x-'):
676 out.append('%s: %s' % (header.capitalize(), value))
677 out.append('')
678 out.append(exc.read() or '<empty body>')
679 out.append('-' * 10)
680 return '\n'.join(out)
681
maruel@chromium.orgef333122013-03-12 20:36:40 +0000682
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000683class AppEngineService(HttpService):
684 """This class implements authentication support for
685 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000686 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000687
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000688 # This lock ensures that user won't be confused with multiple concurrent
689 # login prompts.
690 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000691
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000692 def __init__(self, urlhost, email=None, password=None):
693 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000694 self.email = email
695 self.password = password
696 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000697
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000698 def authenticate(self):
699 """Authenticates in the app engine application.
700 Returns True on success.
701 """
702 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000703 logging.error('\'upload\' module is missing, '
704 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000705 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000706 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000707 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000708 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000709 class AuthServer(upload.AbstractRpcServer):
710 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000711 # Authentication code needs to know about 302 response.
712 # So make OpenerDirector without HTTPRedirectHandler.
713 opener = urllib2.OpenerDirector()
714 opener.add_handler(urllib2.ProxyHandler())
715 opener.add_handler(urllib2.UnknownHandler())
716 opener.add_handler(urllib2.HTTPHandler())
717 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
718 opener.add_handler(urllib2.HTTPSHandler())
719 opener.add_handler(urllib2.HTTPErrorProcessor())
720 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000721 return opener
722 def PerformAuthentication(self):
723 self._Authenticate()
724 save_cookie_jar()
725 return self.authenticated
726 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000727 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000728 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000729
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000730 def get_credentials(self):
731 """Called during authentication process to get the credentials.
732 May be called mutliple times if authentication fails.
733 Returns tuple (email, password).
734 """
735 # 'authenticate' calls this only if 'upload' is present.
736 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
737 assert upload, '\'upload\' module is required for this to work'
738 if self.email and self.password:
739 return (self.email, self.password)
740 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000741 self._keyring = upload.KeyringCreds(self.urlhost,
742 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000743 self.email)
744 return self._keyring.GetUserCredentials()
745
746
747class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
748 """MozillaCookieJar with thread safe load and save."""
749
750 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
751 """Loads cookies from the file if it exists."""
maruel@chromium.org4e2676d2013-06-06 18:39:48 +0000752 filename = os.path.expanduser(filename or self.filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000753 with self._cookies_lock:
754 if os.path.exists(filename):
755 try:
756 cookielib.MozillaCookieJar.load(self, filename,
757 ignore_discard,
758 ignore_expires)
759 logging.debug('Loaded cookies from %s', filename)
760 except (cookielib.LoadError, IOError):
761 pass
762 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000763 try:
764 fd = os.open(filename, os.O_CREAT, 0600)
765 os.close(fd)
766 except OSError:
767 logging.error('Failed to create %s', filename)
768 try:
769 os.chmod(filename, 0600)
770 except OSError:
771 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000772
773 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
774 """Saves cookies to the file, completely overwriting it."""
775 logging.debug('Saving cookies to %s', filename or self.filename)
776 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000777 try:
778 cookielib.MozillaCookieJar.save(self, filename,
779 ignore_discard,
780 ignore_expires)
781 except OSError:
782 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000783
784
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000785class ThreadPoolError(Exception):
786 """Base class for exceptions raised by ThreadPool."""
787
788
789class ThreadPoolEmpty(ThreadPoolError):
790 """Trying to get task result from a thread pool with no pending tasks."""
791
792
793class ThreadPoolClosed(ThreadPoolError):
794 """Trying to do something with a closed thread pool."""
795
796
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000797class ThreadPool(object):
798 """Implements a multithreaded worker pool oriented for mapping jobs with
799 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000800
801 Arguments:
802 - initial_threads: Number of threads to start immediately. Can be 0 if it is
803 uncertain that threads will be needed.
804 - max_threads: Maximum number of threads that will be started when all the
805 threads are busy working. Often the number of CPU cores.
806 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
807 queue. A non-zero value may make add_task() blocking.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000808 - prefix: Prefix to use for thread names. Pool's threads will be
809 named '<prefix>-<thread index>'.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000810 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000811 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000812
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000813 def __init__(self, initial_threads, max_threads, queue_size, prefix=None):
814 prefix = prefix or 'tp-0x%0x' % id(self)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000815 logging.debug(
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000816 'New ThreadPool(%d, %d, %d): %s', initial_threads, max_threads,
817 queue_size, prefix)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000818 assert initial_threads <= max_threads
819 # Update this check once 256 cores CPU are common.
820 assert max_threads <= 256
821
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000822 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000823 self._max_threads = max_threads
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000824 self._prefix = prefix
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000825
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000826 # Mutables.
827 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000828 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000829 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000830 self._outputs = []
831 self._exceptions = []
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000832
833 # List of threads, number of threads in wait state, number of terminated and
834 # starting threads. All protected by _workers_lock.
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000835 self._workers_lock = threading.Lock()
836 self._workers = []
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000837 self._ready = 0
838 # Number of terminated threads, used to handle some edge cases in
839 # _is_task_queue_empty.
840 self._dead = 0
841 # Number of threads already added to _workers, but not yet running the loop.
842 self._starting = 0
843 # True if close was called. Forbids adding new tasks.
844 self._is_closed = False
845
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000846 for _ in range(initial_threads):
847 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000848
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000849 def _add_worker(self):
850 """Adds one worker thread if there isn't too many. Thread-safe."""
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000851 with self._workers_lock:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000852 if len(self._workers) >= self._max_threads or self._is_closed:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000853 return False
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000854 worker = threading.Thread(
855 name='%s-%d' % (self._prefix, len(self._workers)), target=self._run)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000856 self._workers.append(worker)
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000857 self._starting += 1
858 logging.debug('Starting worker thread %s', worker.name)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000859 worker.daemon = True
860 worker.start()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000861 return True
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000862
maruel@chromium.org831958f2013-01-22 15:01:46 +0000863 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000864 """Adds a task, a function to be executed by a worker.
865
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000866 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000867 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000868
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000869 |func| can either return a return value to be added to the output list or
870 be a generator which can emit multiple values.
871
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000872 Returns the index of the item added, e.g. the total number of enqueued items
873 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000874 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000875 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000876 assert callable(func)
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000877 with self._workers_lock:
878 if self._is_closed:
879 raise ThreadPoolClosed('Can not add a task to a closed ThreadPool')
880 start_new_worker = (
881 # Pending task count plus new task > number of available workers.
882 self.tasks.qsize() + 1 > self._ready + self._starting and
883 # Enough slots.
884 len(self._workers) < self._max_threads
885 )
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000886 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000887 self._num_of_added_tasks += 1
888 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000889 self.tasks.put((priority, index, func, args, kwargs))
890 if start_new_worker:
891 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000892 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000893
894 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000895 """Worker thread loop. Runs until a None task is queued."""
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000896 started = False
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000897 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000898 try:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000899 with self._workers_lock:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000900 self._ready += 1
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000901 if not started:
902 self._starting -= 1
903 started = True
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000904 task = self.tasks.get()
905 finally:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000906 with self._workers_lock:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000907 self._ready -= 1
908 try:
909 if task is None:
910 # We're done.
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000911 with self._workers_lock:
912 self._dead += 1
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000913 return
914 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000915 if inspect.isgeneratorfunction(func):
916 for out in func(*args, **kwargs):
917 self._output_append(out)
918 else:
919 out = func(*args, **kwargs)
920 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000921 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000922 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000923 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000924 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000925 self._outputs_exceptions_cond.acquire()
926 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000927 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000928 self._outputs_exceptions_cond.notifyAll()
929 finally:
930 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000931 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000932 try:
933 self.tasks.task_done()
934 except Exception as e:
935 # We need to catch and log this error here because this is the root
936 # function for the thread, nothing higher will catch the error.
937 logging.exception('Caught exception while marking task as done: %s',
938 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000939
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000940 def _output_append(self, out):
941 if out is not None:
942 self._outputs_exceptions_cond.acquire()
943 try:
944 self._outputs.append(out)
945 self._outputs_exceptions_cond.notifyAll()
946 finally:
947 self._outputs_exceptions_cond.release()
948
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000949 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000950 """Extracts all the results from each threads unordered.
951
952 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000953
954 Note: will wait for all work items to be done before returning an exception.
955 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000956 """
957 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000958 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000959 self._outputs_exceptions_cond.acquire()
960 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000961 if self._exceptions:
962 e = self._exceptions.pop(0)
963 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000964 out = self._outputs
965 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000966 finally:
967 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000968 return out
969
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000970 def get_one_result(self):
971 """Returns the next item that was generated or raises an exception if one
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000972 occurred.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000973
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000974 Raises:
975 ThreadPoolEmpty - no results available.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000976 """
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000977 # Get first available result.
978 for result in self.iter_results():
979 return result
980 # No results -> tasks queue is empty.
981 raise ThreadPoolEmpty('Task queue is empty')
982
983 def iter_results(self):
984 """Yields results as they appear until all tasks are processed."""
985 while True:
986 # Check for pending results.
987 result = None
988 self._outputs_exceptions_cond.acquire()
989 try:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000990 if self._exceptions:
991 e = self._exceptions.pop(0)
992 raise e[0], e[1], e[2]
993 if self._outputs:
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000994 # Remember the result to yield it outside of the lock.
995 result = self._outputs.pop(0)
996 else:
997 # No pending results and no pending tasks -> all tasks are done.
998 if self._is_task_queue_empty():
999 return
1000 # Some task is queued, wait for its result to appear.
1001 # Use non-None timeout so that process reacts to Ctrl+C and other
1002 # signals, see http://bugs.python.org/issue8844.
1003 self._outputs_exceptions_cond.wait(timeout=5)
1004 continue
1005 finally:
1006 self._outputs_exceptions_cond.release()
1007 yield result
1008
1009 def _is_task_queue_empty(self):
1010 """True if task queue is empty and all workers are idle.
1011
1012 Doesn't check for pending results from already finished tasks.
1013
1014 Note: this property is not reliable in case tasks are still being
1015 enqueued by concurrent threads.
1016 """
1017 # Some pending tasks?
1018 if not self.tasks.empty():
1019 return False
1020 # Some workers are busy?
1021 with self._workers_lock:
1022 idle = self._ready + self._dead + self._starting
1023 if idle != len(self._workers):
1024 return False
1025 return True
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001026
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001027 def close(self):
1028 """Closes all the threads."""
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001029 # Ensure no new threads can be started, self._workers is effectively
1030 # a constant after that and can be accessed outside the lock.
1031 with self._workers_lock:
1032 if self._is_closed:
1033 raise ThreadPoolClosed('Can not close already closed ThreadPool')
1034 self._is_closed = True
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001035 for _ in range(len(self._workers)):
1036 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +00001037 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001038 for t in self._workers:
1039 t.join()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001040 logging.debug(
1041 'Thread pool \'%s\' closed: spawned %d threads total',
1042 self._prefix, len(self._workers))
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001043
1044 def __enter__(self):
1045 """Enables 'with' statement."""
1046 return self
1047
maruel@chromium.org97cd0be2013-03-13 14:01:36 +00001048 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +00001049 """Enables 'with' statement."""
1050 self.close()
1051
1052
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001053def valid_file(filepath, size):
1054 """Determines if the given files appears valid (currently it just checks
1055 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001056 if size == UNKNOWN_FILE_SIZE:
1057 return True
1058 actual_size = os.stat(filepath).st_size
1059 if size != actual_size:
1060 logging.warning(
1061 'Found invalid item %s; %d != %d',
1062 os.path.basename(filepath), actual_size, size)
1063 return False
1064 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001065
1066
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001067class Profiler(object):
1068 def __init__(self, name):
1069 self.name = name
1070 self.start_time = None
1071
1072 def __enter__(self):
1073 self.start_time = time.time()
1074 return self
1075
1076 def __exit__(self, _exc_type, _exec_value, _traceback):
1077 time_taken = time.time() - self.start_time
1078 logging.info('Profiling: Section %s took %3.3f seconds',
1079 self.name, time_taken)
1080
1081
1082class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001083 """Priority based worker queue to fetch or upload files from a
1084 content-address server. Any function may be given as the fetcher/upload,
1085 as long as it takes two inputs (the item contents, and their relative
1086 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001087
1088 Supports local file system, CIFS or http remotes.
1089
1090 When the priority of items is equals, works in strict FIFO mode.
1091 """
1092 # Initial and maximum number of worker threads.
1093 INITIAL_WORKERS = 2
1094 MAX_WORKERS = 16
1095 # Priorities.
1096 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
1097 INTERNAL_PRIORITY_BITS = (1<<8) - 1
1098 RETRIES = 5
1099
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001100 def __init__(self, destination_root):
1101 # Function to fetch a remote object or upload to a remote location..
1102 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001103 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001104 self._done = Queue.PriorityQueue()
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001105 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0, 'upload')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001106
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001107 def join(self):
1108 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001109 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001110
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +00001111 def close(self):
1112 """Terminates all worker threads."""
1113 self._pool.close()
1114
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001115 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001116 """Retrieves an object from the remote data store.
1117
1118 The smaller |priority| gets fetched first.
1119
1120 Thread-safe.
1121 """
1122 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001123 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001124
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001125 def _add_item(self, priority, obj, dest, size):
1126 assert isinstance(obj, basestring), obj
1127 assert isinstance(dest, basestring), dest
1128 assert size is None or isinstance(size, int), size
1129 return self._pool.add_task(
1130 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001131
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001132 def get_one_result(self):
1133 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001134
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001135 def _task_executer(self, priority, obj, dest, size):
1136 """Wraps self._do_item to trap and retry on IOError exceptions."""
1137 try:
1138 self._do_item(obj, dest)
1139 if size and not valid_file(dest, size):
1140 download_size = os.stat(dest).st_size
1141 os.remove(dest)
1142 raise IOError('File incorrect size after download of %s. Got %s and '
1143 'expected %s' % (obj, download_size, size))
1144 # TODO(maruel): Technically, we'd want to have an output queue to be a
1145 # PriorityQueue.
1146 return obj
1147 except IOError as e:
1148 logging.debug('Caught IOError: %s', e)
1149 # Retry a few times, lowering the priority.
1150 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
1151 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001152 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001153 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001154
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001155 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001156 """Returns a object to retrieve objects from a remote."""
1157 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001158 def download_file(item, dest):
1159 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1160 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001161 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +00001162 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001163 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001164
1165 # Because the app engine DB is only eventually consistent, retry
1166 # 404 errors because the file might just not be visible yet (even
1167 # though it has been uploaded).
1168 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +00001169 if not connection:
1170 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001171 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +00001172 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001173 with open(dest, 'wb') as f:
1174 while True:
1175 chunk = connection.read(ZIPPED_FILE_CHUNK)
1176 if not chunk:
1177 break
maruel@chromium.org3f039182012-11-27 21:32:41 +00001178 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001179 f.write(decompressor.decompress(chunk))
1180 # Ensure that all the data was properly decompressed.
1181 uncompressed_data = decompressor.flush()
1182 assert not uncompressed_data
maruel@chromium.orgb9738cc2013-06-06 14:06:44 +00001183 except IOError as e:
1184 logging.error(
1185 'Failed to download %s at %s.\n%s', item, dest, e)
csharp@chromium.org549669e2013-01-22 19:48:17 +00001186 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +00001187 except httplib.HTTPException as e:
maruel@chromium.orgb9738cc2013-06-06 14:06:44 +00001188 msg = 'HTTPException while retrieving %s at %s.\n%s' % (
1189 item, dest, e)
1190 logging.error(msg)
1191 raise IOError(msg)
csharp@chromium.org186d6232012-11-26 14:36:12 +00001192 except zlib.error as e:
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001193 remaining_size = len(connection.read())
maruel@chromium.orgb9738cc2013-06-06 14:06:44 +00001194 msg = 'Corrupted zlib for item %s. Processed %d of %d bytes.\n%s' % (
1195 item, size, size + remaining_size, e)
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001196 logging.error(msg)
csharp@chromium.orgec477752013-05-24 20:48:48 +00001197
1198 # Testing seems to show that if a few machines are trying to download
1199 # the same blob, they can cause each other to fail. So if we hit a
1200 # zip error, this is the most likely cause (it only downloads some of
1201 # the data). Randomly sleep for between 5 and 25 seconds to try and
1202 # spread out the downloads.
1203 # TODO(csharp): Switch from blobstorage to cloud storage and see if
1204 # that solves the issue.
1205 sleep_duration = (random.random() * 20) + 5
1206 time.sleep(sleep_duration)
1207
csharp@chromium.orge3413b42013-05-24 17:56:56 +00001208 raise IOError(msg)
1209
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001210
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001211 return download_file
1212
1213 def copy_file(item, dest):
1214 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001215 if source == dest:
1216 logging.info('Source and destination are the same, no action required')
1217 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001218 logging.debug('copy_file(%s, %s)', source, dest)
1219 shutil.copy(source, dest)
1220 return copy_file
1221
1222
1223class CachePolicies(object):
1224 def __init__(self, max_cache_size, min_free_space, max_items):
1225 """
1226 Arguments:
1227 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1228 cache is effectively a leak.
1229 - min_free_space: Trim if disk free space becomes lower than this value. If
1230 0, it unconditionally fill the disk.
1231 - max_items: Maximum number of items to keep in the cache. If 0, do not
1232 enforce a limit.
1233 """
1234 self.max_cache_size = max_cache_size
1235 self.min_free_space = min_free_space
1236 self.max_items = max_items
1237
1238
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001239class NoCache(object):
1240 """This class is intended to be usable everywhere the Cache class is.
1241 Instead of downloading to a cache, all files are downloaded to the target
1242 directory and then moved to where they are needed.
1243 """
1244
1245 def __init__(self, target_directory, remote):
1246 self.target_directory = target_directory
1247 self.remote = remote
1248
1249 def retrieve(self, priority, item, size):
1250 """Get the request file."""
1251 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001252 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001253
1254 def wait_for(self, items):
1255 """Download the first item of the given list if it is missing."""
1256 item = items.iterkeys().next()
1257
1258 if not os.path.exists(self.path(item)):
1259 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001260 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001261 assert downloaded == item
1262
1263 return item
1264
1265 def path(self, item):
1266 return os.path.join(self.target_directory, item)
1267
1268
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001269class Cache(object):
1270 """Stateful LRU cache.
1271
1272 Saves its state as json file.
1273 """
1274 STATE_FILE = 'state.json'
1275
1276 def __init__(self, cache_dir, remote, policies):
1277 """
1278 Arguments:
1279 - cache_dir: Directory where to place the cache.
1280 - remote: Remote where to fetch items from.
1281 - policies: cache retention policies.
1282 """
1283 self.cache_dir = cache_dir
1284 self.remote = remote
1285 self.policies = policies
1286 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1287 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1288 # self.state[0] is the oldest item.
1289 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001290 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001291 # A lookup map to speed up searching.
1292 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001293 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001294
1295 # Items currently being fetched. Keep it local to reduce lock contention.
1296 self._pending_queue = set()
1297
1298 # Profiling values.
1299 self._added = []
1300 self._removed = []
1301 self._free_disk = 0
1302
maruel@chromium.org770993b2012-12-11 17:16:48 +00001303 with Profiler('Setup'):
1304 if not os.path.isdir(self.cache_dir):
1305 os.makedirs(self.cache_dir)
1306 if os.path.isfile(self.state_file):
1307 try:
1308 self.state = json.load(open(self.state_file, 'r'))
1309 except (IOError, ValueError), e:
1310 # Too bad. The file will be overwritten and the cache cleared.
1311 logging.error(
1312 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1313 self._state_need_to_be_saved = True
1314 if (not isinstance(self.state, list) or
1315 not all(
1316 isinstance(i, (list, tuple)) and len(i) == 2
1317 for i in self.state)):
1318 # Discard.
1319 self._state_need_to_be_saved = True
1320 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001321
maruel@chromium.org770993b2012-12-11 17:16:48 +00001322 # Ensure that all files listed in the state still exist and add new ones.
1323 previous = set(filename for filename, _ in self.state)
1324 if len(previous) != len(self.state):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001325 logging.warning('Cache state is corrupted, found duplicate files')
maruel@chromium.org770993b2012-12-11 17:16:48 +00001326 self._state_need_to_be_saved = True
1327 self.state = []
1328
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001329 added = 0
1330 for filename in os.listdir(self.cache_dir):
1331 if filename == self.STATE_FILE:
1332 continue
1333 if filename in previous:
1334 previous.remove(filename)
1335 continue
1336 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001337 if not RE_IS_SHA1.match(filename):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001338 logging.warning('Removing unknown file %s from cache', filename)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001339 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001340 continue
1341 # Insert as the oldest file. It will be deleted eventually if not
1342 # accessed.
1343 self._add(filename, False)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001344 logging.warning('Add unknown file %s to cache', filename)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001345 added += 1
1346
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001347 if added:
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001348 logging.warning('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001349 if previous:
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001350 logging.warning('Removed %d lost files', len(previous))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001351 # Set explicitly in case self._add() wasn't called.
1352 self._state_need_to_be_saved = True
1353 # Filter out entries that were not found while keeping the previous
1354 # order.
1355 self.state = [
1356 (filename, size) for filename, size in self.state
1357 if filename not in previous
1358 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001359 self.trim()
1360
1361 def __enter__(self):
1362 return self
1363
1364 def __exit__(self, _exc_type, _exec_value, _traceback):
1365 with Profiler('CleanupTrimming'):
1366 self.trim()
1367
1368 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001369 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001370 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001371 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001372 len(self.state),
1373 sum(i[1] for i in self.state) / 1024)
1374 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001375 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1376 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001377
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001378 def remove_file_at_index(self, index):
1379 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001380 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001381 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001382 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001383 # If the lookup was already stale, its possible the filename was not
1384 # present yet.
1385 self._lookup_is_stale = True
1386 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001387 self._removed.append(size)
1388 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001389 except OSError as e:
1390 logging.error('Error attempting to delete a file\n%s' % e)
1391
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001392 def remove_lru_file(self):
1393 """Removes the last recently used file."""
1394 self.remove_file_at_index(0)
1395
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001396 def trim(self):
1397 """Trims anything we don't know, make sure enough free space exists."""
1398 # Ensure maximum cache size.
1399 if self.policies.max_cache_size and self.state:
1400 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1401 self.remove_lru_file()
1402
1403 # Ensure maximum number of items in the cache.
1404 if self.policies.max_items and self.state:
1405 while len(self.state) > self.policies.max_items:
1406 self.remove_lru_file()
1407
1408 # Ensure enough free space.
1409 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001410 trimmed_due_to_space = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001411 while (
1412 self.policies.min_free_space and
1413 self.state and
1414 self._free_disk < self.policies.min_free_space):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001415 trimmed_due_to_space = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001416 self.remove_lru_file()
1417 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001418 if trimmed_due_to_space:
1419 total = sum(i[1] for i in self.state)
1420 logging.warning(
1421 'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
1422 'cache (%.1f%% of its maximum capacity)',
1423 self._free_disk / 1024.,
1424 total / 1024.,
1425 100. * self.policies.max_cache_size / float(total),
1426 )
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001427 self.save()
1428
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001429 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001430 """Retrieves a file from the remote, if not already cached, and adds it to
1431 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001432
1433 If the file is in the cache, verifiy that the file is valid (i.e. it is
1434 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001435 """
1436 assert not '/' in item
1437 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001438 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001439 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001440
1441 if index is not None:
1442 if not valid_file(self.path(item), size):
1443 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001444 index = None
1445 else:
1446 assert index < len(self.state)
1447 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001448 self._state_need_to_be_saved = True
1449 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001450 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001451
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001452 if index is None:
1453 if item in self._pending_queue:
1454 # Already pending. The same object could be referenced multiple times.
1455 return
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001456 # TODO(maruel): It should look at the free disk space, the current cache
1457 # size and the size of the new item on every new item:
1458 # - Trim the cache as more entries are listed when free disk space is low,
1459 # otherwise if the amount of data downloaded during the run > free disk
1460 # space, it'll crash.
1461 # - Make sure there's enough free disk space to fit all dependencies of
1462 # this run! If not, abort early.
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001463 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001464 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001465
1466 def add(self, filepath, obj):
1467 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001468 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001469 if not obj in self._lookup:
1470 link_file(self.path(obj), filepath, HARDLINK)
1471 self._add(obj, True)
1472
1473 def path(self, item):
1474 """Returns the path to one item."""
1475 return os.path.join(self.cache_dir, item)
1476
1477 def save(self):
1478 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001479 if self._state_need_to_be_saved:
1480 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1481 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001482
1483 def wait_for(self, items):
1484 """Starts a loop that waits for at least one of |items| to be retrieved.
1485
1486 Returns the first item retrieved.
1487 """
1488 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001489 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001490 for item in items:
1491 if item in self._lookup:
1492 return item
1493
1494 assert all(i in self._pending_queue for i in items), (
1495 items, self._pending_queue)
1496 # Note that:
1497 # len(self._pending_queue) ==
1498 # ( len(self.remote._workers) - self.remote._ready +
1499 # len(self._remote._queue) + len(self._remote.done))
1500 # There is no lock-free way to verify that.
1501 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001502 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001503 self._pending_queue.remove(item)
1504 self._add(item, True)
1505 if item in items:
1506 return item
1507
1508 def _add(self, item, at_end):
1509 """Adds an item in the internal state.
1510
1511 If |at_end| is False, self._lookup becomes inconsistent and
1512 self._update_lookup() must be called.
1513 """
1514 size = os.stat(self.path(item)).st_size
1515 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001516 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001517 if at_end:
1518 self.state.append((item, size))
1519 self._lookup[item] = len(self.state) - 1
1520 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001521 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001522 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001523
1524 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001525 if self._lookup_is_stale:
1526 self._lookup = dict(
1527 (filename, index) for index, (filename, _) in enumerate(self.state))
1528 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001529
1530
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001531class IsolatedFile(object):
1532 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001533 def __init__(self, obj_hash):
1534 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001535 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001536 self.obj_hash = obj_hash
1537 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001538 # .isolate and all the .isolated files recursively included by it with
1539 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1540 # .isolated file in the hash table, is important, as the later ones are not
1541 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001542 self.can_fetch = False
1543
1544 # Raw data.
1545 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001546 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001547 self.children = []
1548
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001549 # Set once the .isolated file is loaded.
1550 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001551 # Set once the files are fetched.
1552 self.files_fetched = False
1553
1554 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001555 """Verifies the .isolated file is valid and loads this object with the json
1556 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001557 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001558 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1559 assert not self._is_parsed
1560 self.data = load_isolated(content)
1561 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1562 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001563
1564 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001565 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001566
1567 Preemptively request files.
1568
1569 Note that |files| is modified by this function.
1570 """
1571 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001572 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001573 return
1574 logging.debug('fetch_files(%s)' % self.obj_hash)
1575 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001576 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001577 # overriden files must not be fetched.
1578 if filepath not in files:
1579 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001580 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001581 # Preemptively request files.
1582 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001583 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001584 self.files_fetched = True
1585
1586
1587class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001588 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001589 def __init__(self):
1590 self.command = []
1591 self.files = {}
1592 self.read_only = None
1593 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001594 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001595 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001596
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001597 def load(self, cache, root_isolated_hash):
1598 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001599
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001600 It enables support for "included" .isolated files. They are processed in
1601 strict order but fetched asynchronously from the cache. This is important so
1602 that a file in an included .isolated file that is overridden by an embedding
1603 .isolated file is not fetched neededlessly. The includes are fetched in one
1604 pass and the files are fetched as soon as all the ones on the left-side
1605 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001606
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001607 The prioritization is very important here for nested .isolated files.
1608 'includes' have the highest priority and the algorithm is optimized for both
1609 deep and wide trees. A deep one is a long link of .isolated files referenced
1610 one at a time by one item in 'includes'. A wide one has a large number of
1611 'includes' in a single .isolated file. 'left' is defined as an included
1612 .isolated file earlier in the 'includes' list. So the order of the elements
1613 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001614 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001615 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001616 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001617 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001618 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001619 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001620
1621 def update_self(node):
1622 node.fetch_files(cache, self.files)
1623 # Grabs properties.
1624 if not self.command and node.data.get('command'):
1625 self.command = node.data['command']
1626 if self.read_only is None and node.data.get('read_only') is not None:
1627 self.read_only = node.data['read_only']
1628 if (self.relative_cwd is None and
1629 node.data.get('relative_cwd') is not None):
1630 self.relative_cwd = node.data['relative_cwd']
1631
1632 def traverse_tree(node):
1633 if node.can_fetch:
1634 if not node.files_fetched:
1635 update_self(node)
1636 will_break = False
1637 for i in node.children:
1638 if not i.can_fetch:
1639 if will_break:
1640 break
1641 # Automatically mark the first one as fetcheable.
1642 i.can_fetch = True
1643 will_break = True
1644 traverse_tree(i)
1645
1646 while pending:
1647 item_hash = cache.wait_for(pending)
1648 item = pending.pop(item_hash)
1649 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001650 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001651 # It's the root item.
1652 item.can_fetch = True
1653
1654 for new_child in item.children:
1655 h = new_child.obj_hash
1656 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001657 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001658 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001659 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001660
1661 # Traverse the whole tree to see if files can now be fetched.
1662 traverse_tree(self.root)
1663 def check(n):
1664 return all(check(x) for x in n.children) and n.files_fetched
1665 assert check(self.root)
1666 self.relative_cwd = self.relative_cwd or ''
1667 self.read_only = self.read_only or False
1668
1669
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001670def create_directories(base_directory, files):
1671 """Creates the directory structure needed by the given list of files."""
1672 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1673 # Creates the tree of directories to create.
1674 directories = set(os.path.dirname(f) for f in files)
1675 for item in list(directories):
1676 while item:
1677 directories.add(item)
1678 item = os.path.dirname(item)
1679 for d in sorted(directories):
1680 if d:
1681 os.mkdir(os.path.join(base_directory, d))
1682
1683
1684def create_links(base_directory, files):
1685 """Creates any links needed by the given set of files."""
1686 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001687 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001688 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001689 if sys.platform == 'win32':
1690 # TODO(maruel): Create junctions or empty text files similar to what
1691 # cygwin do?
1692 logging.warning('Ignoring symlink %s', filepath)
1693 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001694 outfile = os.path.join(base_directory, filepath)
1695 # symlink doesn't exist on Windows. So the 'link' property should
1696 # never be specified for windows .isolated file.
1697 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1698 if 'm' in properties:
1699 lchmod = getattr(os, 'lchmod', None)
1700 if lchmod:
1701 lchmod(outfile, properties['m'])
1702
1703
1704def setup_commands(base_directory, cwd, cmd):
1705 """Correctly adjusts and then returns the required working directory
1706 and command needed to run the test.
1707 """
1708 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1709 cwd = os.path.join(base_directory, cwd)
1710 if not os.path.isdir(cwd):
1711 os.makedirs(cwd)
1712
1713 # Ensure paths are correctly separated on windows.
1714 cmd[0] = cmd[0].replace('/', os.path.sep)
1715 cmd = fix_python_path(cmd)
1716
1717 return cwd, cmd
1718
1719
1720def generate_remaining_files(files):
1721 """Generates a dictionary of all the remaining files to be downloaded."""
1722 remaining = {}
1723 for filepath, props in files:
1724 if 'h' in props:
1725 remaining.setdefault(props['h'], []).append((filepath, props))
1726
1727 return remaining
1728
1729
1730def download_test_data(isolated_hash, target_directory, remote):
1731 """Downloads the dependencies to the given directory."""
1732 if not os.path.exists(target_directory):
1733 os.makedirs(target_directory)
1734
1735 settings = Settings()
1736 no_cache = NoCache(target_directory, Remote(remote))
1737
1738 # Download all the isolated files.
1739 with Profiler('GetIsolateds') as _prof:
1740 settings.load(no_cache, isolated_hash)
1741
1742 if not settings.command:
1743 print >> sys.stderr, 'No command to run'
1744 return 1
1745
1746 with Profiler('GetRest') as _prof:
1747 create_directories(target_directory, settings.files)
1748 create_links(target_directory, settings.files.iteritems())
1749
1750 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1751 settings.command[:])
1752
1753 remaining = generate_remaining_files(settings.files.iteritems())
1754
1755 # Now block on the remaining files to be downloaded and mapped.
1756 logging.info('Retrieving remaining files')
1757 last_update = time.time()
1758 while remaining:
1759 obj = no_cache.wait_for(remaining)
1760 files = remaining.pop(obj)
1761
1762 for i, (filepath, properties) in enumerate(files):
1763 outfile = os.path.join(target_directory, filepath)
1764 logging.info(no_cache.path(obj))
1765
1766 if i + 1 == len(files):
1767 os.rename(no_cache.path(obj), outfile)
1768 else:
1769 shutil.copyfile(no_cache.path(obj), outfile)
1770
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001771 if 'm' in properties and not sys.platform == 'win32':
1772 # It's not set on Windows. It could be set only in the case of
1773 # downloading content generated from another OS. Do not crash in that
1774 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001775 os.chmod(outfile, properties['m'])
1776
1777 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1778 logging.info('%d files remaining...' % len(remaining))
1779 last_update = time.time()
1780
1781 print('.isolated files successfully downloaded and setup in %s' %
1782 target_directory)
1783 print('To run this test please run the command %s from the directory %s' %
1784 (cmd, cwd))
1785
1786 return 0
1787
1788
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001789def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001790 """Downloads the dependencies in the cache, hardlinks them into a temporary
1791 directory and runs the executable.
1792 """
1793 settings = Settings()
1794 with Cache(cache_dir, Remote(remote), policies) as cache:
1795 outdir = make_temp_dir('run_tha_test', cache_dir)
1796 try:
1797 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001798 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001799 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001800 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001801 # Adds it in the cache. While not strictly necessary, this simplifies
1802 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001803 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001804 cache.add(isolated_hash, h)
1805 isolated_hash = h
1806 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001807
1808 if not settings.command:
1809 print >> sys.stderr, 'No command to run'
1810 return 1
1811
1812 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001813 create_directories(outdir, settings.files)
1814 create_links(outdir, settings.files.iteritems())
1815 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001816
1817 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001818 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1819 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001820
1821 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001822 logging.info('Retrieving remaining files')
1823 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001824 while remaining:
1825 obj = cache.wait_for(remaining)
1826 for filepath, properties in remaining.pop(obj):
1827 outfile = os.path.join(outdir, filepath)
1828 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001829 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001830 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001831 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001832
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001833 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1834 logging.info('%d files remaining...' % len(remaining))
1835 last_update = time.time()
1836
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001837 if settings.read_only:
1838 make_writable(outdir, True)
1839 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001840
1841 # TODO(csharp): This should be specified somewhere else.
1842 # Add a rotating log file if one doesn't already exist.
1843 env = os.environ.copy()
1844 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001845 try:
1846 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001847 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001848 except OSError:
1849 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1850 raise
1851 finally:
1852 rmtree(outdir)
1853
1854
1855def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001856 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001857 parser = optparse.OptionParser(
1858 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1859 parser.add_option(
1860 '-v', '--verbose', action='count', default=0, help='Use multiple times')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001861
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001862 group = optparse.OptionGroup(parser, 'Download')
1863 group.add_option(
1864 '--download', metavar='DEST',
1865 help='Downloads files to DEST and returns without running, instead of '
1866 'downloading and then running from a temporary directory.')
1867 parser.add_option_group(group)
1868
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001869 group = optparse.OptionGroup(parser, 'Data source')
1870 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001871 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001872 metavar='FILE',
1873 help='File/url describing what to map or run')
1874 group.add_option(
1875 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001876 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001877 parser.add_option_group(group)
1878
1879 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001880 '-r', '--remote', metavar='URL',
1881 default=
1882 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1883 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001884 group = optparse.OptionGroup(parser, 'Cache management')
1885 group.add_option(
1886 '--cache',
1887 default='cache',
1888 metavar='DIR',
1889 help='Cache directory, default=%default')
1890 group.add_option(
1891 '--max-cache-size',
1892 type='int',
1893 metavar='NNN',
1894 default=20*1024*1024*1024,
1895 help='Trim if the cache gets larger than this value, default=%default')
1896 group.add_option(
1897 '--min-free-space',
1898 type='int',
1899 metavar='NNN',
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001900 default=2*1024*1024*1024,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001901 help='Trim if disk free space becomes lower than this value, '
1902 'default=%default')
1903 group.add_option(
1904 '--max-items',
1905 type='int',
1906 metavar='NNN',
1907 default=100000,
1908 help='Trim if more than this number of items are in the cache '
1909 'default=%default')
1910 parser.add_option_group(group)
1911
1912 options, args = parser.parse_args()
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001913 levels = [logging.WARNING, logging.INFO, logging.DEBUG]
1914 level = levels[min(len(levels) - 1, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001915
1916 logging_console = logging.StreamHandler()
1917 logging_console.setFormatter(logging.Formatter(
1918 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1919 logging_console.setLevel(level)
1920 logging.getLogger().addHandler(logging_console)
1921
1922 logging_rotating_file = logging.handlers.RotatingFileHandler(
1923 RUN_ISOLATED_LOG_FILE,
1924 maxBytes=10 * 1024 * 1024, backupCount=5)
1925 logging_rotating_file.setLevel(logging.DEBUG)
1926 logging_rotating_file.setFormatter(logging.Formatter(
1927 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1928 logging.getLogger().addHandler(logging_rotating_file)
1929
1930 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001931
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001932 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001933 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001934 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001935 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001936 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001937 parser.error('Unsupported args %s' % ' '.join(args))
1938
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001939 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001940 policies = CachePolicies(
1941 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001942
1943 if options.download:
1944 return download_test_data(options.isolated or options.hash,
1945 options.download, options.remote)
1946 else:
1947 try:
1948 return run_tha_test(
1949 options.isolated or options.hash,
1950 options.cache,
1951 options.remote,
1952 policies)
1953 except Exception, e:
1954 # Make sure any exception is logged.
1955 logging.exception(e)
1956 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001957
1958
1959if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001960 # Ensure that we are always running with the correct encoding.
1961 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001962 sys.exit(main())