blob: 2b7bb22414fd1026d07fbae65f47b17523a591a9 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000016import itertools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000018import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000019import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000020import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000021import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000022import optparse
23import os
24import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000025import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000026import re
27import shutil
28import stat
29import subprocess
30import sys
31import tempfile
32import threading
33import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000034import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000035import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000036import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000037import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000038import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000039
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000040# Try to import 'upload' module used by AppEngineService for authentication.
41# If it is not there, app engine authentication support will be disabled.
42try:
43 from third_party import upload
44 # Hack out upload logging.info()
45 upload.logging = logging.getLogger('upload')
46 # Mac pylint choke on this line.
47 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
48except ImportError:
49 upload = None
50
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000051
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000052# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000053HARDLINK, SYMLINK, COPY = range(1, 4)
54
55RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
56
csharp@chromium.org8dc52542012-11-08 20:29:55 +000057# The file size to be used when we don't know the correct file size,
58# generally used for .isolated files.
59UNKNOWN_FILE_SIZE = None
60
csharp@chromium.orga92403f2012-11-20 15:13:59 +000061# The size of each chunk to read when downloading and unzipping files.
62ZIPPED_FILE_CHUNK = 16 * 1024
63
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000064# The name of the log file to use.
65RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
66
csharp@chromium.orge217f302012-11-22 16:51:53 +000067# The base directory containing this file.
68BASE_DIR = os.path.dirname(os.path.abspath(__file__))
69
70# The name of the log to use for the run_test_cases.py command
71RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
72
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000073# The delay (in seconds) to wait between logging statements when retrieving
74# the required files. This is intended to let the user (or buildbot) know that
75# the program is still running.
76DELAY_BETWEEN_UPDATES_IN_SECS = 30
77
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000078# The name of the key to store the count of url attempts.
79COUNT_KEY = 'UrlOpenAttempt'
80
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000081# Default maximum number of attempts to trying opening a url before aborting.
82URL_OPEN_MAX_ATTEMPTS = 30
83# Default timeout when retrying.
84URL_OPEN_TIMEOUT = 6*60.
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000085
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000086# Global (for now) map: server URL (http://example.com) -> HttpService instance.
87# Used by get_http_service to cache HttpService instances.
88_http_services = {}
89_http_services_lock = threading.Lock()
90
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +000091# Used by get_flavor().
92FLAVOR_MAPPING = {
93 'cygwin': 'win',
94 'win32': 'win',
95 'darwin': 'mac',
96 'sunos5': 'solaris',
97 'freebsd7': 'freebsd',
98 'freebsd8': 'freebsd',
99}
100
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000101
102class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000103 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000104 pass
105
106
107class MappingError(OSError):
108 """Failed to recreate the tree."""
109 pass
110
111
112def get_flavor():
113 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000114 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000115
116
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000117def fix_default_encoding():
118 """Forces utf8 solidly on all platforms.
119
120 By default python execution environment is lazy and defaults to ascii
121 encoding.
122
123 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
124 """
125 if sys.getdefaultencoding() == 'utf-8':
126 return False
127
128 # Regenerate setdefaultencoding.
129 reload(sys)
130 # Module 'sys' has no 'setdefaultencoding' member
131 # pylint: disable=E1101
132 sys.setdefaultencoding('utf-8')
133 for attr in dir(locale):
134 if attr[0:3] != 'LC_':
135 continue
136 aref = getattr(locale, attr)
137 try:
138 locale.setlocale(aref, '')
139 except locale.Error:
140 continue
141 try:
142 lang = locale.getlocale(aref)[0]
143 except (TypeError, ValueError):
144 continue
145 if lang:
146 try:
147 locale.setlocale(aref, (lang, 'UTF-8'))
148 except locale.Error:
149 os.environ[attr] = lang + '.UTF-8'
150 try:
151 locale.setlocale(locale.LC_ALL, '')
152 except locale.Error:
153 pass
154 return True
155
156
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000157class Unbuffered(object):
158 """Disable buffering on a file object."""
159 def __init__(self, stream):
160 self.stream = stream
161
162 def write(self, data):
163 self.stream.write(data)
164 if '\n' in data:
165 self.stream.flush()
166
167 def __getattr__(self, attr):
168 return getattr(self.stream, attr)
169
170
171def disable_buffering():
172 """Makes this process and child processes stdout unbuffered."""
173 if not os.environ.get('PYTHONUNBUFFERED'):
174 # Since sys.stdout is a C++ object, it's impossible to do
175 # sys.stdout.write = lambda...
176 sys.stdout = Unbuffered(sys.stdout)
177 os.environ['PYTHONUNBUFFERED'] = 'x'
178
179
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000180def os_link(source, link_name):
181 """Add support for os.link() on Windows."""
182 if sys.platform == 'win32':
183 if not ctypes.windll.kernel32.CreateHardLinkW(
184 unicode(link_name), unicode(source), 0):
185 raise OSError()
186 else:
187 os.link(source, link_name)
188
189
190def readable_copy(outfile, infile):
191 """Makes a copy of the file that is readable by everyone."""
192 shutil.copy(infile, outfile)
193 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
194 stat.S_IRGRP | stat.S_IROTH)
195 os.chmod(outfile, read_enabled_mode)
196
197
198def link_file(outfile, infile, action):
199 """Links a file. The type of link depends on |action|."""
200 logging.debug('Mapping %s to %s' % (infile, outfile))
201 if action not in (HARDLINK, SYMLINK, COPY):
202 raise ValueError('Unknown mapping action %s' % action)
203 if not os.path.isfile(infile):
204 raise MappingError('%s is missing' % infile)
205 if os.path.isfile(outfile):
206 raise MappingError(
207 '%s already exist; insize:%d; outsize:%d' %
208 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
209
210 if action == COPY:
211 readable_copy(outfile, infile)
212 elif action == SYMLINK and sys.platform != 'win32':
213 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000214 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000215 else:
216 try:
217 os_link(infile, outfile)
218 except OSError:
219 # Probably a different file system.
220 logging.warn(
221 'Failed to hardlink, failing back to copy %s to %s' % (
222 infile, outfile))
223 readable_copy(outfile, infile)
224
225
226def _set_write_bit(path, read_only):
227 """Sets or resets the executable bit on a file or directory."""
228 mode = os.lstat(path).st_mode
229 if read_only:
230 mode = mode & 0500
231 else:
232 mode = mode | 0200
233 if hasattr(os, 'lchmod'):
234 os.lchmod(path, mode) # pylint: disable=E1101
235 else:
236 if stat.S_ISLNK(mode):
237 # Skip symlink without lchmod() support.
238 logging.debug('Can\'t change +w bit on symlink %s' % path)
239 return
240
241 # TODO(maruel): Implement proper DACL modification on Windows.
242 os.chmod(path, mode)
243
244
245def make_writable(root, read_only):
246 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000247 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000248 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
249 for filename in filenames:
250 _set_write_bit(os.path.join(dirpath, filename), read_only)
251
252 for dirname in dirnames:
253 _set_write_bit(os.path.join(dirpath, dirname), read_only)
254
255
256def rmtree(root):
257 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
258 make_writable(root, False)
259 if sys.platform == 'win32':
260 for i in range(3):
261 try:
262 shutil.rmtree(root)
263 break
264 except WindowsError: # pylint: disable=E0602
265 delay = (i+1)*2
266 print >> sys.stderr, (
267 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
268 time.sleep(delay)
269 else:
270 shutil.rmtree(root)
271
272
273def is_same_filesystem(path1, path2):
274 """Returns True if both paths are on the same filesystem.
275
276 This is required to enable the use of hardlinks.
277 """
278 assert os.path.isabs(path1), path1
279 assert os.path.isabs(path2), path2
280 if sys.platform == 'win32':
281 # If the drive letter mismatches, assume it's a separate partition.
282 # TODO(maruel): It should look at the underlying drive, a drive letter could
283 # be a mount point to a directory on another drive.
284 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
285 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
286 if path1[0].lower() != path2[0].lower():
287 return False
288 return os.stat(path1).st_dev == os.stat(path2).st_dev
289
290
291def get_free_space(path):
292 """Returns the number of free bytes."""
293 if sys.platform == 'win32':
294 free_bytes = ctypes.c_ulonglong(0)
295 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
296 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
297 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000298 # For OSes other than Windows.
299 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000300 return f.f_bfree * f.f_frsize
301
302
303def make_temp_dir(prefix, root_dir):
304 """Returns a temporary directory on the same file system as root_dir."""
305 base_temp_dir = None
306 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
307 base_temp_dir = os.path.dirname(root_dir)
308 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
309
310
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000311def load_isolated(content):
312 """Verifies the .isolated file is valid and loads this object with the json
313 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000314 """
315 try:
316 data = json.loads(content)
317 except ValueError:
318 raise ConfigError('Failed to parse: %s...' % content[:100])
319
320 if not isinstance(data, dict):
321 raise ConfigError('Expected dict, got %r' % data)
322
323 for key, value in data.iteritems():
324 if key == 'command':
325 if not isinstance(value, list):
326 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000327 if not value:
328 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000329 for subvalue in value:
330 if not isinstance(subvalue, basestring):
331 raise ConfigError('Expected string, got %r' % subvalue)
332
333 elif key == 'files':
334 if not isinstance(value, dict):
335 raise ConfigError('Expected dict, got %r' % value)
336 for subkey, subvalue in value.iteritems():
337 if not isinstance(subkey, basestring):
338 raise ConfigError('Expected string, got %r' % subkey)
339 if not isinstance(subvalue, dict):
340 raise ConfigError('Expected dict, got %r' % subvalue)
341 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000342 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000343 if not isinstance(subsubvalue, basestring):
344 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000345 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000346 if not isinstance(subsubvalue, int):
347 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000348 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 if not RE_IS_SHA1.match(subsubvalue):
350 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000351 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000352 if not isinstance(subsubvalue, int):
353 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000354 else:
355 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000356 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000357 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000358 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
359 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000360
361 elif key == 'includes':
362 if not isinstance(value, list):
363 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000364 if not value:
365 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000366 for subvalue in value:
367 if not RE_IS_SHA1.match(subvalue):
368 raise ConfigError('Expected sha-1, got %r' % subvalue)
369
370 elif key == 'read_only':
371 if not isinstance(value, bool):
372 raise ConfigError('Expected bool, got %r' % value)
373
374 elif key == 'relative_cwd':
375 if not isinstance(value, basestring):
376 raise ConfigError('Expected string, got %r' % value)
377
378 elif key == 'os':
379 if value != get_flavor():
380 raise ConfigError(
381 'Expected \'os\' to be \'%s\' but got \'%s\'' %
382 (get_flavor(), value))
383
384 else:
385 raise ConfigError('Unknown key %s' % key)
386
387 return data
388
389
390def fix_python_path(cmd):
391 """Returns the fixed command line to call the right python executable."""
392 out = cmd[:]
393 if out[0] == 'python':
394 out[0] = sys.executable
395 elif out[0].endswith('.py'):
396 out.insert(0, sys.executable)
397 return out
398
399
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000400def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000401 """Attempts to open the given url multiple times.
402
403 |data| can be either:
404 -None for a GET request
405 -str for pre-encoded data
406 -list for data to be encoded
407 -dict for data to be encoded (COUNT_KEY will be added in this case)
408
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000409 Returns a file-like object, where the response may be read from, or None
410 if it was unable to connect.
411 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000412 urlhost, urlpath = split_server_request_url(url)
413 service = get_http_service(urlhost)
414 return service.request(urlpath, **kwargs)
415
416
417def split_server_request_url(url):
418 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000419 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000420 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
421 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
422 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000423
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000424
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000425def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000426 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000427 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000428 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000429 # Ensure consistency.
430 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000431 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000432 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000433 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000434 service = AppEngineService(urlhost)
435 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000436 return service
437
438
439class HttpService(object):
440 """Base class for a class that provides an API to HTTP based service:
441 - Provides 'request' method.
442 - Supports automatic request retries.
443 - Supports persistent cookies.
444 - Thread safe.
445 """
446
447 # File to use to store all auth cookies.
maruel@chromium.org16452a32013-04-05 00:18:44 +0000448 COOKIE_FILE = os.path.join('~', '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000449
450 # CookieJar reused by all services + lock that protects its instantiation.
451 _cookie_jar = None
452 _cookie_jar_lock = threading.Lock()
453
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000454 def __init__(self, urlhost):
455 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000456 self.cookie_jar = self.load_cookie_jar()
457 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000458
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000459 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000460 """Called when HTTP server asks client to authenticate.
461 Can be implemented in subclasses.
462 """
463 return False
464
465 @staticmethod
466 def load_cookie_jar():
467 """Returns global CoookieJar object that stores cookies in the file."""
468 with HttpService._cookie_jar_lock:
469 if HttpService._cookie_jar is not None:
470 return HttpService._cookie_jar
471 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
472 jar.load()
473 HttpService._cookie_jar = jar
474 return jar
475
476 @staticmethod
477 def save_cookie_jar():
478 """Called when cookie jar needs to be flushed to disk."""
479 with HttpService._cookie_jar_lock:
480 if HttpService._cookie_jar is not None:
481 HttpService._cookie_jar.save()
482
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000483 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000484 """Returns OpenerDirector that will be used when sending requests.
485 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000486 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000487
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000488 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000489 """Attempts to open the given url multiple times.
490
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000491 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000492
493 |data| can be either:
494 -None for a GET request
495 -str for pre-encoded data
496 -list for data to be encoded
497 -dict for data to be encoded (COUNT_KEY will be added in this case)
498
499 Returns a file-like object, where the response may be read from, or None
500 if it was unable to connect.
501 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000502 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000503
504 if isinstance(data, dict) and COUNT_KEY in data:
505 logging.error('%s already existed in the data passed into UlrOpen. It '
506 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
507 return None
508
509 method = 'GET' if data is None else 'POST'
510 assert not ((method != 'POST') and content_type), (
511 'Can\'t use content_type on GET')
512
513 def make_request(extra):
514 """Returns a urllib2.Request instance for this specific retry."""
515 if isinstance(data, str) or data is None:
516 payload = data
517 else:
518 if isinstance(data, dict):
519 payload = data.items()
520 else:
521 payload = data[:]
522 payload.extend(extra.iteritems())
523 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000524 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000525 if isinstance(data, str) or data is None:
526 # In these cases, add the extra parameter to the query part of the url.
527 url_parts = list(urlparse.urlparse(new_url))
528 # Append the query parameter.
529 if url_parts[4] and extra:
530 url_parts[4] += '&'
531 url_parts[4] += urllib.urlencode(extra)
532 new_url = urlparse.urlunparse(url_parts)
533 request = urllib2.Request(new_url, data=payload)
534 if payload is not None:
535 if content_type:
536 request.add_header('Content-Type', content_type)
537 request.add_header('Content-Length', len(payload))
538 return request
539
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000540 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000541
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000542 def _retry_loop(
543 self,
544 make_request,
545 max_attempts=URL_OPEN_MAX_ATTEMPTS,
546 retry_404=False,
547 retry_50x=True,
548 timeout=URL_OPEN_TIMEOUT):
549 """Runs internal request-retry loop.
550
551 - Optionally retries HTTP 404 and 50x.
552 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
553 number of retries.
554 - Retries up to |timeout| duration in seconds. If None or 0, there's no
555 limit in the time taken to do retries.
556 - If both |max_attempts| and |timeout| are None or 0, this functions retries
557 indefinitely.
558 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000559 authenticated = False
560 last_error = None
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000561 attempt = 0
562 start = self._now()
563 for attempt in itertools.count():
564 if max_attempts and attempt >= max_attempts:
565 # Too many attempts.
566 break
567 if timeout and (self._now() - start) >= timeout:
568 # Retried for too long.
569 break
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000570 extra = {COUNT_KEY: attempt} if attempt else {}
571 request = make_request(extra)
572 try:
573 url_response = self._url_open(request)
574 logging.debug('url_open(%s) succeeded', request.get_full_url())
575 return url_response
576 except urllib2.HTTPError as e:
577 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000578 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000579 # Try to authenticate only once. If it doesn't help, then server does
580 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000581 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000582 'Authentication is required for %s on attempt %d.\n%s',
583 request.get_full_url(), attempt,
584 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000585 if not authenticated and self.authenticate():
586 authenticated = True
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000587 # Do not sleep.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000588 continue
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000589 # If authentication failed, return.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000590 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000591 'Unable to authenticate to %s.\n%s',
592 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000593 return None
594
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000595 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
596 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000597 # This HTTPError means we reached the server and there was a problem
598 # with the request, so don't retry.
599 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000600 'Able to connect to %s but an exception was thrown.\n%s',
601 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000602 return None
603
604 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000605 logging.warning('Able to connect to %s on attempt %d.\n%s',
606 request.get_full_url(), attempt,
607 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000608 last_error = e
609
610 except (urllib2.URLError, httplib.HTTPException) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000611 logging.warning('Unable to open url %s on attempt %d.\n%s',
612 request.get_full_url(), attempt,
613 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000614 last_error = e
615
616 # Only sleep if we are going to try again.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000617 if max_attempts and attempt != max_attempts:
618 remaining = None
619 if timeout:
620 remaining = timeout - (self._now() - start)
621 if remaining <= 0:
622 break
623 self.sleep_before_retry(attempt, remaining)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000624
625 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000626 request.get_full_url(), max_attempts,
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000627 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000628 return None
629
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000630 def _url_open(self, request):
631 """Low level method to execute urllib2.Request's.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000632
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000633 To be mocked in tests.
634 """
635 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000636
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000637 @staticmethod
638 def _now():
639 """To be mocked in tests."""
640 return time.time()
641
642 @staticmethod
643 def calculate_sleep_before_retry(attempt, max_duration):
644 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
645 # survive.
646 MAX_SLEEP = 10.
647 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
648 # time by starting with 1.5/2+1.5^-1 median offset.
649 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
650 assert duration > 0.1
651 duration = min(MAX_SLEEP, duration)
652 if max_duration:
653 duration = min(max_duration, duration)
654 return duration
655
656 @classmethod
657 def sleep_before_retry(cls, attempt, max_duration):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000658 """Sleeps for some amount of time when retrying the request.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000659
660 To be mocked in tests.
661 """
662 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
maruel@chromium.orgef333122013-03-12 20:36:40 +0000663
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000664 @staticmethod
665 def _format_exception(exc, verbose=False):
666 """Given an instance of some exception raised by urlopen returns human
667 readable piece of text with detailed information about the error.
668 """
669 out = ['Exception: %s' % (exc,)]
670 if verbose:
671 if isinstance(exc, urllib2.HTTPError):
672 out.append('-' * 10)
673 if exc.hdrs:
674 for header, value in exc.hdrs.items():
675 if not header.startswith('x-'):
676 out.append('%s: %s' % (header.capitalize(), value))
677 out.append('')
678 out.append(exc.read() or '<empty body>')
679 out.append('-' * 10)
680 return '\n'.join(out)
681
maruel@chromium.orgef333122013-03-12 20:36:40 +0000682
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000683class AppEngineService(HttpService):
684 """This class implements authentication support for
685 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000686 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000687
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000688 # This lock ensures that user won't be confused with multiple concurrent
689 # login prompts.
690 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000691
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000692 def __init__(self, urlhost, email=None, password=None):
693 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000694 self.email = email
695 self.password = password
696 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000697
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000698 def authenticate(self):
699 """Authenticates in the app engine application.
700 Returns True on success.
701 """
702 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000703 logging.error('\'upload\' module is missing, '
704 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000705 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000706 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000707 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000708 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000709 class AuthServer(upload.AbstractRpcServer):
710 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000711 # Authentication code needs to know about 302 response.
712 # So make OpenerDirector without HTTPRedirectHandler.
713 opener = urllib2.OpenerDirector()
714 opener.add_handler(urllib2.ProxyHandler())
715 opener.add_handler(urllib2.UnknownHandler())
716 opener.add_handler(urllib2.HTTPHandler())
717 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
718 opener.add_handler(urllib2.HTTPSHandler())
719 opener.add_handler(urllib2.HTTPErrorProcessor())
720 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000721 return opener
722 def PerformAuthentication(self):
723 self._Authenticate()
724 save_cookie_jar()
725 return self.authenticated
726 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000727 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000728 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000729
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000730 def get_credentials(self):
731 """Called during authentication process to get the credentials.
732 May be called mutliple times if authentication fails.
733 Returns tuple (email, password).
734 """
735 # 'authenticate' calls this only if 'upload' is present.
736 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
737 assert upload, '\'upload\' module is required for this to work'
738 if self.email and self.password:
739 return (self.email, self.password)
740 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000741 self._keyring = upload.KeyringCreds(self.urlhost,
742 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000743 self.email)
744 return self._keyring.GetUserCredentials()
745
746
747class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
748 """MozillaCookieJar with thread safe load and save."""
749
750 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
751 """Loads cookies from the file if it exists."""
752 filename = filename or self.filename
753 with self._cookies_lock:
754 if os.path.exists(filename):
755 try:
756 cookielib.MozillaCookieJar.load(self, filename,
757 ignore_discard,
758 ignore_expires)
759 logging.debug('Loaded cookies from %s', filename)
760 except (cookielib.LoadError, IOError):
761 pass
762 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000763 try:
764 fd = os.open(filename, os.O_CREAT, 0600)
765 os.close(fd)
766 except OSError:
767 logging.error('Failed to create %s', filename)
768 try:
769 os.chmod(filename, 0600)
770 except OSError:
771 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000772
773 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
774 """Saves cookies to the file, completely overwriting it."""
775 logging.debug('Saving cookies to %s', filename or self.filename)
776 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000777 try:
778 cookielib.MozillaCookieJar.save(self, filename,
779 ignore_discard,
780 ignore_expires)
781 except OSError:
782 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000783
784
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000785class ThreadPool(object):
786 """Implements a multithreaded worker pool oriented for mapping jobs with
787 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000788
789 Arguments:
790 - initial_threads: Number of threads to start immediately. Can be 0 if it is
791 uncertain that threads will be needed.
792 - max_threads: Maximum number of threads that will be started when all the
793 threads are busy working. Often the number of CPU cores.
794 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
795 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000796 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000797 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000798
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000799 def __init__(self, initial_threads, max_threads, queue_size):
800 logging.debug(
801 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
802 assert initial_threads <= max_threads
803 # Update this check once 256 cores CPU are common.
804 assert max_threads <= 256
805
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000806 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000807 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000808
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000809 # Mutables.
810 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000811 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000812 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000813 self._outputs = []
814 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000815 # Number of threads in wait state.
816 self._ready_lock = threading.Lock()
817 self._ready = 0
818 self._workers_lock = threading.Lock()
819 self._workers = []
820 for _ in range(initial_threads):
821 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000822
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000823 def _add_worker(self):
824 """Adds one worker thread if there isn't too many. Thread-safe."""
825 # Better to take the lock two times than hold it for too long.
826 with self._workers_lock:
827 if len(self._workers) >= self._max_threads:
828 return False
829 worker = threading.Thread(target=self._run)
830 with self._workers_lock:
831 if len(self._workers) >= self._max_threads:
832 return False
833 self._workers.append(worker)
834 worker.daemon = True
835 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000836
maruel@chromium.org831958f2013-01-22 15:01:46 +0000837 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000838 """Adds a task, a function to be executed by a worker.
839
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000840 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000841 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000842
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000843 |func| can either return a return value to be added to the output list or
844 be a generator which can emit multiple values.
845
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000846 Returns the index of the item added, e.g. the total number of enqueued items
847 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000848 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000849 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000850 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000851 with self._ready_lock:
852 start_new_worker = not self._ready
853 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000854 self._num_of_added_tasks += 1
855 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000856 self.tasks.put((priority, index, func, args, kwargs))
857 if start_new_worker:
858 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000859 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000860
861 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000862 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000863 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000864 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000865 with self._ready_lock:
866 self._ready += 1
867 task = self.tasks.get()
868 finally:
869 with self._ready_lock:
870 self._ready -= 1
871 try:
872 if task is None:
873 # We're done.
874 return
875 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000876 if inspect.isgeneratorfunction(func):
877 for out in func(*args, **kwargs):
878 self._output_append(out)
879 else:
880 out = func(*args, **kwargs)
881 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000882 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000883 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000884 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000885 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000886 self._outputs_exceptions_cond.acquire()
887 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000888 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000889 self._outputs_exceptions_cond.notifyAll()
890 finally:
891 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000892 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000893 try:
894 self.tasks.task_done()
895 except Exception as e:
896 # We need to catch and log this error here because this is the root
897 # function for the thread, nothing higher will catch the error.
898 logging.exception('Caught exception while marking task as done: %s',
899 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000900
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000901 def _output_append(self, out):
902 if out is not None:
903 self._outputs_exceptions_cond.acquire()
904 try:
905 self._outputs.append(out)
906 self._outputs_exceptions_cond.notifyAll()
907 finally:
908 self._outputs_exceptions_cond.release()
909
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000910 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000911 """Extracts all the results from each threads unordered.
912
913 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000914
915 Note: will wait for all work items to be done before returning an exception.
916 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000917 """
918 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000919 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000920 self._outputs_exceptions_cond.acquire()
921 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000922 if self._exceptions:
923 e = self._exceptions.pop(0)
924 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000925 out = self._outputs
926 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000927 finally:
928 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000929 return out
930
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000931 def get_one_result(self):
932 """Returns the next item that was generated or raises an exception if one
933 occured.
934
935 Warning: this function will hang if there is no work item left. Use join
936 instead.
937 """
938 self._outputs_exceptions_cond.acquire()
939 try:
940 while True:
941 if self._exceptions:
942 e = self._exceptions.pop(0)
943 raise e[0], e[1], e[2]
944 if self._outputs:
945 return self._outputs.pop(0)
946 self._outputs_exceptions_cond.wait()
947 finally:
948 self._outputs_exceptions_cond.release()
949
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000950 def close(self):
951 """Closes all the threads."""
952 for _ in range(len(self._workers)):
953 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000954 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000955 for t in self._workers:
956 t.join()
957
958 def __enter__(self):
959 """Enables 'with' statement."""
960 return self
961
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000962 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000963 """Enables 'with' statement."""
964 self.close()
965
966
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000967def valid_file(filepath, size):
968 """Determines if the given files appears valid (currently it just checks
969 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000970 if size == UNKNOWN_FILE_SIZE:
971 return True
972 actual_size = os.stat(filepath).st_size
973 if size != actual_size:
974 logging.warning(
975 'Found invalid item %s; %d != %d',
976 os.path.basename(filepath), actual_size, size)
977 return False
978 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000979
980
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000981class Profiler(object):
982 def __init__(self, name):
983 self.name = name
984 self.start_time = None
985
986 def __enter__(self):
987 self.start_time = time.time()
988 return self
989
990 def __exit__(self, _exc_type, _exec_value, _traceback):
991 time_taken = time.time() - self.start_time
992 logging.info('Profiling: Section %s took %3.3f seconds',
993 self.name, time_taken)
994
995
996class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000997 """Priority based worker queue to fetch or upload files from a
998 content-address server. Any function may be given as the fetcher/upload,
999 as long as it takes two inputs (the item contents, and their relative
1000 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001001
1002 Supports local file system, CIFS or http remotes.
1003
1004 When the priority of items is equals, works in strict FIFO mode.
1005 """
1006 # Initial and maximum number of worker threads.
1007 INITIAL_WORKERS = 2
1008 MAX_WORKERS = 16
1009 # Priorities.
1010 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
1011 INTERNAL_PRIORITY_BITS = (1<<8) - 1
1012 RETRIES = 5
1013
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001014 def __init__(self, destination_root):
1015 # Function to fetch a remote object or upload to a remote location..
1016 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001017 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001018 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001019 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001020
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001021 def join(self):
1022 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001023 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +00001024
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001025 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001026 """Retrieves an object from the remote data store.
1027
1028 The smaller |priority| gets fetched first.
1029
1030 Thread-safe.
1031 """
1032 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001033 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001034
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001035 def _add_item(self, priority, obj, dest, size):
1036 assert isinstance(obj, basestring), obj
1037 assert isinstance(dest, basestring), dest
1038 assert size is None or isinstance(size, int), size
1039 return self._pool.add_task(
1040 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001041
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001042 def get_one_result(self):
1043 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001044
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001045 def _task_executer(self, priority, obj, dest, size):
1046 """Wraps self._do_item to trap and retry on IOError exceptions."""
1047 try:
1048 self._do_item(obj, dest)
1049 if size and not valid_file(dest, size):
1050 download_size = os.stat(dest).st_size
1051 os.remove(dest)
1052 raise IOError('File incorrect size after download of %s. Got %s and '
1053 'expected %s' % (obj, download_size, size))
1054 # TODO(maruel): Technically, we'd want to have an output queue to be a
1055 # PriorityQueue.
1056 return obj
1057 except IOError as e:
1058 logging.debug('Caught IOError: %s', e)
1059 # Retry a few times, lowering the priority.
1060 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
1061 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001062 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001063 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001064
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001065 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001066 """Returns a object to retrieve objects from a remote."""
1067 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001068 def download_file(item, dest):
1069 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1070 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001071 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +00001072 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001073 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001074
1075 # Because the app engine DB is only eventually consistent, retry
1076 # 404 errors because the file might just not be visible yet (even
1077 # though it has been uploaded).
1078 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +00001079 if not connection:
1080 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001081 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +00001082 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001083 with open(dest, 'wb') as f:
1084 while True:
1085 chunk = connection.read(ZIPPED_FILE_CHUNK)
1086 if not chunk:
1087 break
maruel@chromium.org3f039182012-11-27 21:32:41 +00001088 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001089 f.write(decompressor.decompress(chunk))
1090 # Ensure that all the data was properly decompressed.
1091 uncompressed_data = decompressor.flush()
1092 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +00001093 except IOError:
1094 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
1095 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +00001096 except httplib.HTTPException as e:
1097 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +00001098 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001099 # Log the first bytes to see if it's uncompressed data.
1100 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +00001101 raise IOError(
1102 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
1103 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001104
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001105 return download_file
1106
1107 def copy_file(item, dest):
1108 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001109 if source == dest:
1110 logging.info('Source and destination are the same, no action required')
1111 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001112 logging.debug('copy_file(%s, %s)', source, dest)
1113 shutil.copy(source, dest)
1114 return copy_file
1115
1116
1117class CachePolicies(object):
1118 def __init__(self, max_cache_size, min_free_space, max_items):
1119 """
1120 Arguments:
1121 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1122 cache is effectively a leak.
1123 - min_free_space: Trim if disk free space becomes lower than this value. If
1124 0, it unconditionally fill the disk.
1125 - max_items: Maximum number of items to keep in the cache. If 0, do not
1126 enforce a limit.
1127 """
1128 self.max_cache_size = max_cache_size
1129 self.min_free_space = min_free_space
1130 self.max_items = max_items
1131
1132
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001133class NoCache(object):
1134 """This class is intended to be usable everywhere the Cache class is.
1135 Instead of downloading to a cache, all files are downloaded to the target
1136 directory and then moved to where they are needed.
1137 """
1138
1139 def __init__(self, target_directory, remote):
1140 self.target_directory = target_directory
1141 self.remote = remote
1142
1143 def retrieve(self, priority, item, size):
1144 """Get the request file."""
1145 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001146 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001147
1148 def wait_for(self, items):
1149 """Download the first item of the given list if it is missing."""
1150 item = items.iterkeys().next()
1151
1152 if not os.path.exists(self.path(item)):
1153 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001154 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001155 assert downloaded == item
1156
1157 return item
1158
1159 def path(self, item):
1160 return os.path.join(self.target_directory, item)
1161
1162
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001163class Cache(object):
1164 """Stateful LRU cache.
1165
1166 Saves its state as json file.
1167 """
1168 STATE_FILE = 'state.json'
1169
1170 def __init__(self, cache_dir, remote, policies):
1171 """
1172 Arguments:
1173 - cache_dir: Directory where to place the cache.
1174 - remote: Remote where to fetch items from.
1175 - policies: cache retention policies.
1176 """
1177 self.cache_dir = cache_dir
1178 self.remote = remote
1179 self.policies = policies
1180 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1181 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1182 # self.state[0] is the oldest item.
1183 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001184 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001185 # A lookup map to speed up searching.
1186 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001187 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001188
1189 # Items currently being fetched. Keep it local to reduce lock contention.
1190 self._pending_queue = set()
1191
1192 # Profiling values.
1193 self._added = []
1194 self._removed = []
1195 self._free_disk = 0
1196
maruel@chromium.org770993b2012-12-11 17:16:48 +00001197 with Profiler('Setup'):
1198 if not os.path.isdir(self.cache_dir):
1199 os.makedirs(self.cache_dir)
1200 if os.path.isfile(self.state_file):
1201 try:
1202 self.state = json.load(open(self.state_file, 'r'))
1203 except (IOError, ValueError), e:
1204 # Too bad. The file will be overwritten and the cache cleared.
1205 logging.error(
1206 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1207 self._state_need_to_be_saved = True
1208 if (not isinstance(self.state, list) or
1209 not all(
1210 isinstance(i, (list, tuple)) and len(i) == 2
1211 for i in self.state)):
1212 # Discard.
1213 self._state_need_to_be_saved = True
1214 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001215
maruel@chromium.org770993b2012-12-11 17:16:48 +00001216 # Ensure that all files listed in the state still exist and add new ones.
1217 previous = set(filename for filename, _ in self.state)
1218 if len(previous) != len(self.state):
1219 logging.warn('Cache state is corrupted, found duplicate files')
1220 self._state_need_to_be_saved = True
1221 self.state = []
1222
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001223 added = 0
1224 for filename in os.listdir(self.cache_dir):
1225 if filename == self.STATE_FILE:
1226 continue
1227 if filename in previous:
1228 previous.remove(filename)
1229 continue
1230 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001231 if not RE_IS_SHA1.match(filename):
1232 logging.warn('Removing unknown file %s from cache', filename)
1233 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001234 continue
1235 # Insert as the oldest file. It will be deleted eventually if not
1236 # accessed.
1237 self._add(filename, False)
1238 logging.warn('Add unknown file %s to cache', filename)
1239 added += 1
1240
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001241 if added:
1242 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001243 if previous:
1244 logging.warn('Removed %d lost files', len(previous))
1245 # Set explicitly in case self._add() wasn't called.
1246 self._state_need_to_be_saved = True
1247 # Filter out entries that were not found while keeping the previous
1248 # order.
1249 self.state = [
1250 (filename, size) for filename, size in self.state
1251 if filename not in previous
1252 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001253 self.trim()
1254
1255 def __enter__(self):
1256 return self
1257
1258 def __exit__(self, _exc_type, _exec_value, _traceback):
1259 with Profiler('CleanupTrimming'):
1260 self.trim()
1261
1262 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001263 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001264 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001265 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001266 len(self.state),
1267 sum(i[1] for i in self.state) / 1024)
1268 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001269 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1270 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001271
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001272 def remove_file_at_index(self, index):
1273 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001274 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001275 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001276 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001277 # If the lookup was already stale, its possible the filename was not
1278 # present yet.
1279 self._lookup_is_stale = True
1280 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001281 self._removed.append(size)
1282 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001283 except OSError as e:
1284 logging.error('Error attempting to delete a file\n%s' % e)
1285
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001286 def remove_lru_file(self):
1287 """Removes the last recently used file."""
1288 self.remove_file_at_index(0)
1289
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001290 def trim(self):
1291 """Trims anything we don't know, make sure enough free space exists."""
1292 # Ensure maximum cache size.
1293 if self.policies.max_cache_size and self.state:
1294 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1295 self.remove_lru_file()
1296
1297 # Ensure maximum number of items in the cache.
1298 if self.policies.max_items and self.state:
1299 while len(self.state) > self.policies.max_items:
1300 self.remove_lru_file()
1301
1302 # Ensure enough free space.
1303 self._free_disk = get_free_space(self.cache_dir)
1304 while (
1305 self.policies.min_free_space and
1306 self.state and
1307 self._free_disk < self.policies.min_free_space):
1308 self.remove_lru_file()
1309 self._free_disk = get_free_space(self.cache_dir)
1310
1311 self.save()
1312
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001313 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001314 """Retrieves a file from the remote, if not already cached, and adds it to
1315 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001316
1317 If the file is in the cache, verifiy that the file is valid (i.e. it is
1318 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001319 """
1320 assert not '/' in item
1321 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001322 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001323 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001324
1325 if index is not None:
1326 if not valid_file(self.path(item), size):
1327 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001328 index = None
1329 else:
1330 assert index < len(self.state)
1331 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001332 self._state_need_to_be_saved = True
1333 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001334 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001335
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001336 if index is None:
1337 if item in self._pending_queue:
1338 # Already pending. The same object could be referenced multiple times.
1339 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001340 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001341 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001342
1343 def add(self, filepath, obj):
1344 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001345 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001346 if not obj in self._lookup:
1347 link_file(self.path(obj), filepath, HARDLINK)
1348 self._add(obj, True)
1349
1350 def path(self, item):
1351 """Returns the path to one item."""
1352 return os.path.join(self.cache_dir, item)
1353
1354 def save(self):
1355 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001356 if self._state_need_to_be_saved:
1357 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1358 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001359
1360 def wait_for(self, items):
1361 """Starts a loop that waits for at least one of |items| to be retrieved.
1362
1363 Returns the first item retrieved.
1364 """
1365 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001366 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001367 for item in items:
1368 if item in self._lookup:
1369 return item
1370
1371 assert all(i in self._pending_queue for i in items), (
1372 items, self._pending_queue)
1373 # Note that:
1374 # len(self._pending_queue) ==
1375 # ( len(self.remote._workers) - self.remote._ready +
1376 # len(self._remote._queue) + len(self._remote.done))
1377 # There is no lock-free way to verify that.
1378 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001379 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001380 self._pending_queue.remove(item)
1381 self._add(item, True)
1382 if item in items:
1383 return item
1384
1385 def _add(self, item, at_end):
1386 """Adds an item in the internal state.
1387
1388 If |at_end| is False, self._lookup becomes inconsistent and
1389 self._update_lookup() must be called.
1390 """
1391 size = os.stat(self.path(item)).st_size
1392 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001393 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001394 if at_end:
1395 self.state.append((item, size))
1396 self._lookup[item] = len(self.state) - 1
1397 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001398 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001399 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001400
1401 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001402 if self._lookup_is_stale:
1403 self._lookup = dict(
1404 (filename, index) for index, (filename, _) in enumerate(self.state))
1405 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001406
1407
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001408class IsolatedFile(object):
1409 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001410 def __init__(self, obj_hash):
1411 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001412 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001413 self.obj_hash = obj_hash
1414 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001415 # .isolate and all the .isolated files recursively included by it with
1416 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1417 # .isolated file in the hash table, is important, as the later ones are not
1418 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001419 self.can_fetch = False
1420
1421 # Raw data.
1422 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001423 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001424 self.children = []
1425
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001426 # Set once the .isolated file is loaded.
1427 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001428 # Set once the files are fetched.
1429 self.files_fetched = False
1430
1431 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001432 """Verifies the .isolated file is valid and loads this object with the json
1433 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001434 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001435 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1436 assert not self._is_parsed
1437 self.data = load_isolated(content)
1438 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1439 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001440
1441 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001442 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001443
1444 Preemptively request files.
1445
1446 Note that |files| is modified by this function.
1447 """
1448 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001449 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001450 return
1451 logging.debug('fetch_files(%s)' % self.obj_hash)
1452 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001453 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001454 # overriden files must not be fetched.
1455 if filepath not in files:
1456 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001457 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001458 # Preemptively request files.
1459 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001460 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001461 self.files_fetched = True
1462
1463
1464class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001465 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001466 def __init__(self):
1467 self.command = []
1468 self.files = {}
1469 self.read_only = None
1470 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001471 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001472 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001473
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001474 def load(self, cache, root_isolated_hash):
1475 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001476
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001477 It enables support for "included" .isolated files. They are processed in
1478 strict order but fetched asynchronously from the cache. This is important so
1479 that a file in an included .isolated file that is overridden by an embedding
1480 .isolated file is not fetched neededlessly. The includes are fetched in one
1481 pass and the files are fetched as soon as all the ones on the left-side
1482 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001483
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001484 The prioritization is very important here for nested .isolated files.
1485 'includes' have the highest priority and the algorithm is optimized for both
1486 deep and wide trees. A deep one is a long link of .isolated files referenced
1487 one at a time by one item in 'includes'. A wide one has a large number of
1488 'includes' in a single .isolated file. 'left' is defined as an included
1489 .isolated file earlier in the 'includes' list. So the order of the elements
1490 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001491 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001492 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001493 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001494 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001495 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001496 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001497
1498 def update_self(node):
1499 node.fetch_files(cache, self.files)
1500 # Grabs properties.
1501 if not self.command and node.data.get('command'):
1502 self.command = node.data['command']
1503 if self.read_only is None and node.data.get('read_only') is not None:
1504 self.read_only = node.data['read_only']
1505 if (self.relative_cwd is None and
1506 node.data.get('relative_cwd') is not None):
1507 self.relative_cwd = node.data['relative_cwd']
1508
1509 def traverse_tree(node):
1510 if node.can_fetch:
1511 if not node.files_fetched:
1512 update_self(node)
1513 will_break = False
1514 for i in node.children:
1515 if not i.can_fetch:
1516 if will_break:
1517 break
1518 # Automatically mark the first one as fetcheable.
1519 i.can_fetch = True
1520 will_break = True
1521 traverse_tree(i)
1522
1523 while pending:
1524 item_hash = cache.wait_for(pending)
1525 item = pending.pop(item_hash)
1526 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001527 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001528 # It's the root item.
1529 item.can_fetch = True
1530
1531 for new_child in item.children:
1532 h = new_child.obj_hash
1533 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001534 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001535 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001536 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001537
1538 # Traverse the whole tree to see if files can now be fetched.
1539 traverse_tree(self.root)
1540 def check(n):
1541 return all(check(x) for x in n.children) and n.files_fetched
1542 assert check(self.root)
1543 self.relative_cwd = self.relative_cwd or ''
1544 self.read_only = self.read_only or False
1545
1546
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001547def create_directories(base_directory, files):
1548 """Creates the directory structure needed by the given list of files."""
1549 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1550 # Creates the tree of directories to create.
1551 directories = set(os.path.dirname(f) for f in files)
1552 for item in list(directories):
1553 while item:
1554 directories.add(item)
1555 item = os.path.dirname(item)
1556 for d in sorted(directories):
1557 if d:
1558 os.mkdir(os.path.join(base_directory, d))
1559
1560
1561def create_links(base_directory, files):
1562 """Creates any links needed by the given set of files."""
1563 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001564 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001565 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001566 if sys.platform == 'win32':
1567 # TODO(maruel): Create junctions or empty text files similar to what
1568 # cygwin do?
1569 logging.warning('Ignoring symlink %s', filepath)
1570 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001571 outfile = os.path.join(base_directory, filepath)
1572 # symlink doesn't exist on Windows. So the 'link' property should
1573 # never be specified for windows .isolated file.
1574 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1575 if 'm' in properties:
1576 lchmod = getattr(os, 'lchmod', None)
1577 if lchmod:
1578 lchmod(outfile, properties['m'])
1579
1580
1581def setup_commands(base_directory, cwd, cmd):
1582 """Correctly adjusts and then returns the required working directory
1583 and command needed to run the test.
1584 """
1585 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1586 cwd = os.path.join(base_directory, cwd)
1587 if not os.path.isdir(cwd):
1588 os.makedirs(cwd)
1589
1590 # Ensure paths are correctly separated on windows.
1591 cmd[0] = cmd[0].replace('/', os.path.sep)
1592 cmd = fix_python_path(cmd)
1593
1594 return cwd, cmd
1595
1596
1597def generate_remaining_files(files):
1598 """Generates a dictionary of all the remaining files to be downloaded."""
1599 remaining = {}
1600 for filepath, props in files:
1601 if 'h' in props:
1602 remaining.setdefault(props['h'], []).append((filepath, props))
1603
1604 return remaining
1605
1606
1607def download_test_data(isolated_hash, target_directory, remote):
1608 """Downloads the dependencies to the given directory."""
1609 if not os.path.exists(target_directory):
1610 os.makedirs(target_directory)
1611
1612 settings = Settings()
1613 no_cache = NoCache(target_directory, Remote(remote))
1614
1615 # Download all the isolated files.
1616 with Profiler('GetIsolateds') as _prof:
1617 settings.load(no_cache, isolated_hash)
1618
1619 if not settings.command:
1620 print >> sys.stderr, 'No command to run'
1621 return 1
1622
1623 with Profiler('GetRest') as _prof:
1624 create_directories(target_directory, settings.files)
1625 create_links(target_directory, settings.files.iteritems())
1626
1627 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1628 settings.command[:])
1629
1630 remaining = generate_remaining_files(settings.files.iteritems())
1631
1632 # Now block on the remaining files to be downloaded and mapped.
1633 logging.info('Retrieving remaining files')
1634 last_update = time.time()
1635 while remaining:
1636 obj = no_cache.wait_for(remaining)
1637 files = remaining.pop(obj)
1638
1639 for i, (filepath, properties) in enumerate(files):
1640 outfile = os.path.join(target_directory, filepath)
1641 logging.info(no_cache.path(obj))
1642
1643 if i + 1 == len(files):
1644 os.rename(no_cache.path(obj), outfile)
1645 else:
1646 shutil.copyfile(no_cache.path(obj), outfile)
1647
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001648 if 'm' in properties and not sys.platform == 'win32':
1649 # It's not set on Windows. It could be set only in the case of
1650 # downloading content generated from another OS. Do not crash in that
1651 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001652 os.chmod(outfile, properties['m'])
1653
1654 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1655 logging.info('%d files remaining...' % len(remaining))
1656 last_update = time.time()
1657
1658 print('.isolated files successfully downloaded and setup in %s' %
1659 target_directory)
1660 print('To run this test please run the command %s from the directory %s' %
1661 (cmd, cwd))
1662
1663 return 0
1664
1665
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001666def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001667 """Downloads the dependencies in the cache, hardlinks them into a temporary
1668 directory and runs the executable.
1669 """
1670 settings = Settings()
1671 with Cache(cache_dir, Remote(remote), policies) as cache:
1672 outdir = make_temp_dir('run_tha_test', cache_dir)
1673 try:
1674 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001675 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001676 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001677 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001678 # Adds it in the cache. While not strictly necessary, this simplifies
1679 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001680 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001681 cache.add(isolated_hash, h)
1682 isolated_hash = h
1683 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001684
1685 if not settings.command:
1686 print >> sys.stderr, 'No command to run'
1687 return 1
1688
1689 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001690 create_directories(outdir, settings.files)
1691 create_links(outdir, settings.files.iteritems())
1692 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001693
1694 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001695 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1696 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001697
1698 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001699 logging.info('Retrieving remaining files')
1700 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001701 while remaining:
1702 obj = cache.wait_for(remaining)
1703 for filepath, properties in remaining.pop(obj):
1704 outfile = os.path.join(outdir, filepath)
1705 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001706 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001707 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001708 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001709
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001710 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1711 logging.info('%d files remaining...' % len(remaining))
1712 last_update = time.time()
1713
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001714 if settings.read_only:
1715 make_writable(outdir, True)
1716 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001717
1718 # TODO(csharp): This should be specified somewhere else.
1719 # Add a rotating log file if one doesn't already exist.
1720 env = os.environ.copy()
1721 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001722 try:
1723 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001724 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001725 except OSError:
1726 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1727 raise
1728 finally:
1729 rmtree(outdir)
1730
1731
1732def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001733 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001734 parser = optparse.OptionParser(
1735 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1736 parser.add_option(
1737 '-v', '--verbose', action='count', default=0, help='Use multiple times')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001738
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001739 group = optparse.OptionGroup(parser, 'Download')
1740 group.add_option(
1741 '--download', metavar='DEST',
1742 help='Downloads files to DEST and returns without running, instead of '
1743 'downloading and then running from a temporary directory.')
1744 parser.add_option_group(group)
1745
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001746 group = optparse.OptionGroup(parser, 'Data source')
1747 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001748 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001749 metavar='FILE',
1750 help='File/url describing what to map or run')
1751 group.add_option(
1752 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001753 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001754 parser.add_option_group(group)
1755
1756 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001757 '-r', '--remote', metavar='URL',
1758 default=
1759 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1760 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001761 group = optparse.OptionGroup(parser, 'Cache management')
1762 group.add_option(
1763 '--cache',
1764 default='cache',
1765 metavar='DIR',
1766 help='Cache directory, default=%default')
1767 group.add_option(
1768 '--max-cache-size',
1769 type='int',
1770 metavar='NNN',
1771 default=20*1024*1024*1024,
1772 help='Trim if the cache gets larger than this value, default=%default')
1773 group.add_option(
1774 '--min-free-space',
1775 type='int',
1776 metavar='NNN',
1777 default=1*1024*1024*1024,
1778 help='Trim if disk free space becomes lower than this value, '
1779 'default=%default')
1780 group.add_option(
1781 '--max-items',
1782 type='int',
1783 metavar='NNN',
1784 default=100000,
1785 help='Trim if more than this number of items are in the cache '
1786 'default=%default')
1787 parser.add_option_group(group)
1788
1789 options, args = parser.parse_args()
1790 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001791
1792 logging_console = logging.StreamHandler()
1793 logging_console.setFormatter(logging.Formatter(
1794 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1795 logging_console.setLevel(level)
1796 logging.getLogger().addHandler(logging_console)
1797
1798 logging_rotating_file = logging.handlers.RotatingFileHandler(
1799 RUN_ISOLATED_LOG_FILE,
1800 maxBytes=10 * 1024 * 1024, backupCount=5)
1801 logging_rotating_file.setLevel(logging.DEBUG)
1802 logging_rotating_file.setFormatter(logging.Formatter(
1803 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1804 logging.getLogger().addHandler(logging_rotating_file)
1805
1806 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001807
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001808 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001809 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001810 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001811 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001812 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001813 parser.error('Unsupported args %s' % ' '.join(args))
1814
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001815 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001816 policies = CachePolicies(
1817 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001818
1819 if options.download:
1820 return download_test_data(options.isolated or options.hash,
1821 options.download, options.remote)
1822 else:
1823 try:
1824 return run_tha_test(
1825 options.isolated or options.hash,
1826 options.cache,
1827 options.remote,
1828 policies)
1829 except Exception, e:
1830 # Make sure any exception is logged.
1831 logging.exception(e)
1832 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001833
1834
1835if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001836 # Ensure that we are always running with the correct encoding.
1837 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001838 sys.exit(main())