blob: b8ecc4af89e0a674b19b315eb720ffbabdb72d51 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000016import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000017import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000019import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000020import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000021import optparse
22import os
23import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000024import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000025import re
26import shutil
27import stat
28import subprocess
29import sys
30import tempfile
31import threading
32import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000033import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000035import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000036import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000037import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000039# Try to import 'upload' module used by AppEngineService for authentication.
40# If it is not there, app engine authentication support will be disabled.
41try:
42 from third_party import upload
43 # Hack out upload logging.info()
44 upload.logging = logging.getLogger('upload')
45 # Mac pylint choke on this line.
46 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
47except ImportError:
48 upload = None
49
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000050
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000051# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000052HARDLINK, SYMLINK, COPY = range(1, 4)
53
54RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
55
csharp@chromium.org8dc52542012-11-08 20:29:55 +000056# The file size to be used when we don't know the correct file size,
57# generally used for .isolated files.
58UNKNOWN_FILE_SIZE = None
59
csharp@chromium.orga92403f2012-11-20 15:13:59 +000060# The size of each chunk to read when downloading and unzipping files.
61ZIPPED_FILE_CHUNK = 16 * 1024
62
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000063# The name of the log file to use.
64RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
65
csharp@chromium.orge217f302012-11-22 16:51:53 +000066# The base directory containing this file.
67BASE_DIR = os.path.dirname(os.path.abspath(__file__))
68
69# The name of the log to use for the run_test_cases.py command
70RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
71
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000072# The delay (in seconds) to wait between logging statements when retrieving
73# the required files. This is intended to let the user (or buildbot) know that
74# the program is still running.
75DELAY_BETWEEN_UPDATES_IN_SECS = 30
76
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000077# The name of the key to store the count of url attempts.
78COUNT_KEY = 'UrlOpenAttempt'
79
80# The maximum number of attempts to trying opening a url before aborting.
csharp@chromium.orgf7b25462013-04-02 17:11:25 +000081MAX_URL_OPEN_ATTEMPTS = 30
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000082
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000083# Global (for now) map: server URL (http://example.com) -> HttpService instance.
84# Used by get_http_service to cache HttpService instances.
85_http_services = {}
86_http_services_lock = threading.Lock()
87
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +000088# Used by get_flavor().
89FLAVOR_MAPPING = {
90 'cygwin': 'win',
91 'win32': 'win',
92 'darwin': 'mac',
93 'sunos5': 'solaris',
94 'freebsd7': 'freebsd',
95 'freebsd8': 'freebsd',
96}
97
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000098
99class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000100 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000101 pass
102
103
104class MappingError(OSError):
105 """Failed to recreate the tree."""
106 pass
107
108
109def get_flavor():
110 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000111 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000112
113
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000114def fix_default_encoding():
115 """Forces utf8 solidly on all platforms.
116
117 By default python execution environment is lazy and defaults to ascii
118 encoding.
119
120 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
121 """
122 if sys.getdefaultencoding() == 'utf-8':
123 return False
124
125 # Regenerate setdefaultencoding.
126 reload(sys)
127 # Module 'sys' has no 'setdefaultencoding' member
128 # pylint: disable=E1101
129 sys.setdefaultencoding('utf-8')
130 for attr in dir(locale):
131 if attr[0:3] != 'LC_':
132 continue
133 aref = getattr(locale, attr)
134 try:
135 locale.setlocale(aref, '')
136 except locale.Error:
137 continue
138 try:
139 lang = locale.getlocale(aref)[0]
140 except (TypeError, ValueError):
141 continue
142 if lang:
143 try:
144 locale.setlocale(aref, (lang, 'UTF-8'))
145 except locale.Error:
146 os.environ[attr] = lang + '.UTF-8'
147 try:
148 locale.setlocale(locale.LC_ALL, '')
149 except locale.Error:
150 pass
151 return True
152
153
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000154class Unbuffered(object):
155 """Disable buffering on a file object."""
156 def __init__(self, stream):
157 self.stream = stream
158
159 def write(self, data):
160 self.stream.write(data)
161 if '\n' in data:
162 self.stream.flush()
163
164 def __getattr__(self, attr):
165 return getattr(self.stream, attr)
166
167
168def disable_buffering():
169 """Makes this process and child processes stdout unbuffered."""
170 if not os.environ.get('PYTHONUNBUFFERED'):
171 # Since sys.stdout is a C++ object, it's impossible to do
172 # sys.stdout.write = lambda...
173 sys.stdout = Unbuffered(sys.stdout)
174 os.environ['PYTHONUNBUFFERED'] = 'x'
175
176
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000177def os_link(source, link_name):
178 """Add support for os.link() on Windows."""
179 if sys.platform == 'win32':
180 if not ctypes.windll.kernel32.CreateHardLinkW(
181 unicode(link_name), unicode(source), 0):
182 raise OSError()
183 else:
184 os.link(source, link_name)
185
186
187def readable_copy(outfile, infile):
188 """Makes a copy of the file that is readable by everyone."""
189 shutil.copy(infile, outfile)
190 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
191 stat.S_IRGRP | stat.S_IROTH)
192 os.chmod(outfile, read_enabled_mode)
193
194
195def link_file(outfile, infile, action):
196 """Links a file. The type of link depends on |action|."""
197 logging.debug('Mapping %s to %s' % (infile, outfile))
198 if action not in (HARDLINK, SYMLINK, COPY):
199 raise ValueError('Unknown mapping action %s' % action)
200 if not os.path.isfile(infile):
201 raise MappingError('%s is missing' % infile)
202 if os.path.isfile(outfile):
203 raise MappingError(
204 '%s already exist; insize:%d; outsize:%d' %
205 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
206
207 if action == COPY:
208 readable_copy(outfile, infile)
209 elif action == SYMLINK and sys.platform != 'win32':
210 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000211 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000212 else:
213 try:
214 os_link(infile, outfile)
215 except OSError:
216 # Probably a different file system.
217 logging.warn(
218 'Failed to hardlink, failing back to copy %s to %s' % (
219 infile, outfile))
220 readable_copy(outfile, infile)
221
222
223def _set_write_bit(path, read_only):
224 """Sets or resets the executable bit on a file or directory."""
225 mode = os.lstat(path).st_mode
226 if read_only:
227 mode = mode & 0500
228 else:
229 mode = mode | 0200
230 if hasattr(os, 'lchmod'):
231 os.lchmod(path, mode) # pylint: disable=E1101
232 else:
233 if stat.S_ISLNK(mode):
234 # Skip symlink without lchmod() support.
235 logging.debug('Can\'t change +w bit on symlink %s' % path)
236 return
237
238 # TODO(maruel): Implement proper DACL modification on Windows.
239 os.chmod(path, mode)
240
241
242def make_writable(root, read_only):
243 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000244 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000245 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
246 for filename in filenames:
247 _set_write_bit(os.path.join(dirpath, filename), read_only)
248
249 for dirname in dirnames:
250 _set_write_bit(os.path.join(dirpath, dirname), read_only)
251
252
253def rmtree(root):
254 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
255 make_writable(root, False)
256 if sys.platform == 'win32':
257 for i in range(3):
258 try:
259 shutil.rmtree(root)
260 break
261 except WindowsError: # pylint: disable=E0602
262 delay = (i+1)*2
263 print >> sys.stderr, (
264 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
265 time.sleep(delay)
266 else:
267 shutil.rmtree(root)
268
269
270def is_same_filesystem(path1, path2):
271 """Returns True if both paths are on the same filesystem.
272
273 This is required to enable the use of hardlinks.
274 """
275 assert os.path.isabs(path1), path1
276 assert os.path.isabs(path2), path2
277 if sys.platform == 'win32':
278 # If the drive letter mismatches, assume it's a separate partition.
279 # TODO(maruel): It should look at the underlying drive, a drive letter could
280 # be a mount point to a directory on another drive.
281 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
282 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
283 if path1[0].lower() != path2[0].lower():
284 return False
285 return os.stat(path1).st_dev == os.stat(path2).st_dev
286
287
288def get_free_space(path):
289 """Returns the number of free bytes."""
290 if sys.platform == 'win32':
291 free_bytes = ctypes.c_ulonglong(0)
292 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
293 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
294 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000295 # For OSes other than Windows.
296 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000297 return f.f_bfree * f.f_frsize
298
299
300def make_temp_dir(prefix, root_dir):
301 """Returns a temporary directory on the same file system as root_dir."""
302 base_temp_dir = None
303 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
304 base_temp_dir = os.path.dirname(root_dir)
305 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
306
307
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000308def load_isolated(content):
309 """Verifies the .isolated file is valid and loads this object with the json
310 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000311 """
312 try:
313 data = json.loads(content)
314 except ValueError:
315 raise ConfigError('Failed to parse: %s...' % content[:100])
316
317 if not isinstance(data, dict):
318 raise ConfigError('Expected dict, got %r' % data)
319
320 for key, value in data.iteritems():
321 if key == 'command':
322 if not isinstance(value, list):
323 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000324 if not value:
325 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000326 for subvalue in value:
327 if not isinstance(subvalue, basestring):
328 raise ConfigError('Expected string, got %r' % subvalue)
329
330 elif key == 'files':
331 if not isinstance(value, dict):
332 raise ConfigError('Expected dict, got %r' % value)
333 for subkey, subvalue in value.iteritems():
334 if not isinstance(subkey, basestring):
335 raise ConfigError('Expected string, got %r' % subkey)
336 if not isinstance(subvalue, dict):
337 raise ConfigError('Expected dict, got %r' % subvalue)
338 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000339 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000340 if not isinstance(subsubvalue, basestring):
341 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000342 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000343 if not isinstance(subsubvalue, int):
344 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000345 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000346 if not RE_IS_SHA1.match(subsubvalue):
347 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000348 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 if not isinstance(subsubvalue, int):
350 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000351 else:
352 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000353 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000354 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000355 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
356 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000357
358 elif key == 'includes':
359 if not isinstance(value, list):
360 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000361 if not value:
362 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000363 for subvalue in value:
364 if not RE_IS_SHA1.match(subvalue):
365 raise ConfigError('Expected sha-1, got %r' % subvalue)
366
367 elif key == 'read_only':
368 if not isinstance(value, bool):
369 raise ConfigError('Expected bool, got %r' % value)
370
371 elif key == 'relative_cwd':
372 if not isinstance(value, basestring):
373 raise ConfigError('Expected string, got %r' % value)
374
375 elif key == 'os':
376 if value != get_flavor():
377 raise ConfigError(
378 'Expected \'os\' to be \'%s\' but got \'%s\'' %
379 (get_flavor(), value))
380
381 else:
382 raise ConfigError('Unknown key %s' % key)
383
384 return data
385
386
387def fix_python_path(cmd):
388 """Returns the fixed command line to call the right python executable."""
389 out = cmd[:]
390 if out[0] == 'python':
391 out[0] = sys.executable
392 elif out[0].endswith('.py'):
393 out.insert(0, sys.executable)
394 return out
395
396
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000397def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000398 """Attempts to open the given url multiple times.
399
400 |data| can be either:
401 -None for a GET request
402 -str for pre-encoded data
403 -list for data to be encoded
404 -dict for data to be encoded (COUNT_KEY will be added in this case)
405
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000406 Returns a file-like object, where the response may be read from, or None
407 if it was unable to connect.
408 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000409 urlhost, urlpath = split_server_request_url(url)
410 service = get_http_service(urlhost)
411 return service.request(urlpath, **kwargs)
412
413
414def split_server_request_url(url):
415 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000416 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000417 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
418 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
419 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000420
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000421
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000422def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000423 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000424 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000425 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000426 # Ensure consistency.
427 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000428 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000429 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000430 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000431 service = AppEngineService(urlhost)
432 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000433 return service
434
435
436class HttpService(object):
437 """Base class for a class that provides an API to HTTP based service:
438 - Provides 'request' method.
439 - Supports automatic request retries.
440 - Supports persistent cookies.
441 - Thread safe.
442 """
443
444 # File to use to store all auth cookies.
maruel@chromium.org16452a32013-04-05 00:18:44 +0000445 COOKIE_FILE = os.path.join('~', '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000446
447 # CookieJar reused by all services + lock that protects its instantiation.
448 _cookie_jar = None
449 _cookie_jar_lock = threading.Lock()
450
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000451 def __init__(self, urlhost):
452 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000453 self.cookie_jar = self.load_cookie_jar()
454 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000455
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000456 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000457 """Called when HTTP server asks client to authenticate.
458 Can be implemented in subclasses.
459 """
460 return False
461
462 @staticmethod
463 def load_cookie_jar():
464 """Returns global CoookieJar object that stores cookies in the file."""
465 with HttpService._cookie_jar_lock:
466 if HttpService._cookie_jar is not None:
467 return HttpService._cookie_jar
468 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
469 jar.load()
470 HttpService._cookie_jar = jar
471 return jar
472
473 @staticmethod
474 def save_cookie_jar():
475 """Called when cookie jar needs to be flushed to disk."""
476 with HttpService._cookie_jar_lock:
477 if HttpService._cookie_jar is not None:
478 HttpService._cookie_jar.save()
479
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000480 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000481 """Returns OpenerDirector that will be used when sending requests.
482 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000483 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000484
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000485 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000486 """Attempts to open the given url multiple times.
487
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000488 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000489
490 |data| can be either:
491 -None for a GET request
492 -str for pre-encoded data
493 -list for data to be encoded
494 -dict for data to be encoded (COUNT_KEY will be added in this case)
495
496 Returns a file-like object, where the response may be read from, or None
497 if it was unable to connect.
498 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000499 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000500
501 if isinstance(data, dict) and COUNT_KEY in data:
502 logging.error('%s already existed in the data passed into UlrOpen. It '
503 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
504 return None
505
506 method = 'GET' if data is None else 'POST'
507 assert not ((method != 'POST') and content_type), (
508 'Can\'t use content_type on GET')
509
510 def make_request(extra):
511 """Returns a urllib2.Request instance for this specific retry."""
512 if isinstance(data, str) or data is None:
513 payload = data
514 else:
515 if isinstance(data, dict):
516 payload = data.items()
517 else:
518 payload = data[:]
519 payload.extend(extra.iteritems())
520 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000521 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000522 if isinstance(data, str) or data is None:
523 # In these cases, add the extra parameter to the query part of the url.
524 url_parts = list(urlparse.urlparse(new_url))
525 # Append the query parameter.
526 if url_parts[4] and extra:
527 url_parts[4] += '&'
528 url_parts[4] += urllib.urlencode(extra)
529 new_url = urlparse.urlunparse(url_parts)
530 request = urllib2.Request(new_url, data=payload)
531 if payload is not None:
532 if content_type:
533 request.add_header('Content-Type', content_type)
534 request.add_header('Content-Length', len(payload))
535 return request
536
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000537 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000538
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000539 def _retry_loop(self, make_request, retry_404=False, retry_50x=True):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000540 """Runs internal request-retry loop."""
541 authenticated = False
542 last_error = None
543 for attempt in range(MAX_URL_OPEN_ATTEMPTS):
544 extra = {COUNT_KEY: attempt} if attempt else {}
545 request = make_request(extra)
546 try:
547 url_response = self._url_open(request)
548 logging.debug('url_open(%s) succeeded', request.get_full_url())
549 return url_response
550 except urllib2.HTTPError as e:
551 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000552 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000553 # Try to authenticate only once. If it doesn't help, then server does
554 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000555 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000556 'Authentication is required for %s on attempt %d.\n%s',
557 request.get_full_url(), attempt,
558 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000559 if not authenticated and self.authenticate():
560 authenticated = True
561 continue
562 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000563 'Unable to authenticate to %s.\n%s',
564 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000565 return None
566
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000567 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
568 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000569 # This HTTPError means we reached the server and there was a problem
570 # with the request, so don't retry.
571 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000572 'Able to connect to %s but an exception was thrown.\n%s',
573 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000574 return None
575
576 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000577 logging.warning('Able to connect to %s on attempt %d.\n%s',
578 request.get_full_url(), attempt,
579 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000580 last_error = e
581
582 except (urllib2.URLError, httplib.HTTPException) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000583 logging.warning('Unable to open url %s on attempt %d.\n%s',
584 request.get_full_url(), attempt,
585 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000586 last_error = e
587
588 # Only sleep if we are going to try again.
589 if attempt != MAX_URL_OPEN_ATTEMPTS - 1:
590 self._sleep_before_retry(attempt)
591
592 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000593 request.get_full_url(), MAX_URL_OPEN_ATTEMPTS,
594 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000595 return None
596
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000597 def _url_open(self, request):
598 """Low level method to execute urllib2.Request's.
599 To be mocked in tests.
600 """
601 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000602
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000603 def _sleep_before_retry(self, attempt): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000604 """Sleeps for some amount of time when retrying the request.
605 To be mocked in tests."""
606 duration = random.random() * 3 + math.pow(1.5, (attempt + 1))
607 duration = min(20, max(0.1, duration))
608 time.sleep(duration)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000609
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000610 @staticmethod
611 def _format_exception(exc, verbose=False):
612 """Given an instance of some exception raised by urlopen returns human
613 readable piece of text with detailed information about the error.
614 """
615 out = ['Exception: %s' % (exc,)]
616 if verbose:
617 if isinstance(exc, urllib2.HTTPError):
618 out.append('-' * 10)
619 if exc.hdrs:
620 for header, value in exc.hdrs.items():
621 if not header.startswith('x-'):
622 out.append('%s: %s' % (header.capitalize(), value))
623 out.append('')
624 out.append(exc.read() or '<empty body>')
625 out.append('-' * 10)
626 return '\n'.join(out)
627
maruel@chromium.orgef333122013-03-12 20:36:40 +0000628
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000629class AppEngineService(HttpService):
630 """This class implements authentication support for
631 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000632 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000633
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000634 # This lock ensures that user won't be confused with multiple concurrent
635 # login prompts.
636 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000637
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000638 def __init__(self, urlhost, email=None, password=None):
639 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000640 self.email = email
641 self.password = password
642 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000643
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000644 def authenticate(self):
645 """Authenticates in the app engine application.
646 Returns True on success.
647 """
648 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000649 logging.error('\'upload\' module is missing, '
650 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000651 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000652 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000653 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000654 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000655 class AuthServer(upload.AbstractRpcServer):
656 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000657 # Authentication code needs to know about 302 response.
658 # So make OpenerDirector without HTTPRedirectHandler.
659 opener = urllib2.OpenerDirector()
660 opener.add_handler(urllib2.ProxyHandler())
661 opener.add_handler(urllib2.UnknownHandler())
662 opener.add_handler(urllib2.HTTPHandler())
663 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
664 opener.add_handler(urllib2.HTTPSHandler())
665 opener.add_handler(urllib2.HTTPErrorProcessor())
666 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000667 return opener
668 def PerformAuthentication(self):
669 self._Authenticate()
670 save_cookie_jar()
671 return self.authenticated
672 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000673 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000674 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000675
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000676 def get_credentials(self):
677 """Called during authentication process to get the credentials.
678 May be called mutliple times if authentication fails.
679 Returns tuple (email, password).
680 """
681 # 'authenticate' calls this only if 'upload' is present.
682 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
683 assert upload, '\'upload\' module is required for this to work'
684 if self.email and self.password:
685 return (self.email, self.password)
686 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000687 self._keyring = upload.KeyringCreds(self.urlhost,
688 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000689 self.email)
690 return self._keyring.GetUserCredentials()
691
692
693class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
694 """MozillaCookieJar with thread safe load and save."""
695
696 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
697 """Loads cookies from the file if it exists."""
698 filename = filename or self.filename
699 with self._cookies_lock:
700 if os.path.exists(filename):
701 try:
702 cookielib.MozillaCookieJar.load(self, filename,
703 ignore_discard,
704 ignore_expires)
705 logging.debug('Loaded cookies from %s', filename)
706 except (cookielib.LoadError, IOError):
707 pass
708 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000709 try:
710 fd = os.open(filename, os.O_CREAT, 0600)
711 os.close(fd)
712 except OSError:
713 logging.error('Failed to create %s', filename)
714 try:
715 os.chmod(filename, 0600)
716 except OSError:
717 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000718
719 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
720 """Saves cookies to the file, completely overwriting it."""
721 logging.debug('Saving cookies to %s', filename or self.filename)
722 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000723 try:
724 cookielib.MozillaCookieJar.save(self, filename,
725 ignore_discard,
726 ignore_expires)
727 except OSError:
728 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000729
730
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000731class ThreadPool(object):
732 """Implements a multithreaded worker pool oriented for mapping jobs with
733 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000734
735 Arguments:
736 - initial_threads: Number of threads to start immediately. Can be 0 if it is
737 uncertain that threads will be needed.
738 - max_threads: Maximum number of threads that will be started when all the
739 threads are busy working. Often the number of CPU cores.
740 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
741 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000742 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000743 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000744
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000745 def __init__(self, initial_threads, max_threads, queue_size):
746 logging.debug(
747 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
748 assert initial_threads <= max_threads
749 # Update this check once 256 cores CPU are common.
750 assert max_threads <= 256
751
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000752 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000753 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000754
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000755 # Mutables.
756 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000757 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000758 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000759 self._outputs = []
760 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000761 # Number of threads in wait state.
762 self._ready_lock = threading.Lock()
763 self._ready = 0
764 self._workers_lock = threading.Lock()
765 self._workers = []
766 for _ in range(initial_threads):
767 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000768
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000769 def _add_worker(self):
770 """Adds one worker thread if there isn't too many. Thread-safe."""
771 # Better to take the lock two times than hold it for too long.
772 with self._workers_lock:
773 if len(self._workers) >= self._max_threads:
774 return False
775 worker = threading.Thread(target=self._run)
776 with self._workers_lock:
777 if len(self._workers) >= self._max_threads:
778 return False
779 self._workers.append(worker)
780 worker.daemon = True
781 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000782
maruel@chromium.org831958f2013-01-22 15:01:46 +0000783 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000784 """Adds a task, a function to be executed by a worker.
785
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000786 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000787 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000788
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000789 |func| can either return a return value to be added to the output list or
790 be a generator which can emit multiple values.
791
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000792 Returns the index of the item added, e.g. the total number of enqueued items
793 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000794 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000795 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000796 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000797 with self._ready_lock:
798 start_new_worker = not self._ready
799 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000800 self._num_of_added_tasks += 1
801 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000802 self.tasks.put((priority, index, func, args, kwargs))
803 if start_new_worker:
804 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000805 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000806
807 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000808 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000809 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000810 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000811 with self._ready_lock:
812 self._ready += 1
813 task = self.tasks.get()
814 finally:
815 with self._ready_lock:
816 self._ready -= 1
817 try:
818 if task is None:
819 # We're done.
820 return
821 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000822 if inspect.isgeneratorfunction(func):
823 for out in func(*args, **kwargs):
824 self._output_append(out)
825 else:
826 out = func(*args, **kwargs)
827 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000828 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000829 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000830 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000831 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000832 self._outputs_exceptions_cond.acquire()
833 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000834 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000835 self._outputs_exceptions_cond.notifyAll()
836 finally:
837 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000838 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000839 try:
840 self.tasks.task_done()
841 except Exception as e:
842 # We need to catch and log this error here because this is the root
843 # function for the thread, nothing higher will catch the error.
844 logging.exception('Caught exception while marking task as done: %s',
845 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000846
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000847 def _output_append(self, out):
848 if out is not None:
849 self._outputs_exceptions_cond.acquire()
850 try:
851 self._outputs.append(out)
852 self._outputs_exceptions_cond.notifyAll()
853 finally:
854 self._outputs_exceptions_cond.release()
855
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000856 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000857 """Extracts all the results from each threads unordered.
858
859 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000860
861 Note: will wait for all work items to be done before returning an exception.
862 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000863 """
864 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000865 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000866 self._outputs_exceptions_cond.acquire()
867 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000868 if self._exceptions:
869 e = self._exceptions.pop(0)
870 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000871 out = self._outputs
872 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000873 finally:
874 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000875 return out
876
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000877 def get_one_result(self):
878 """Returns the next item that was generated or raises an exception if one
879 occured.
880
881 Warning: this function will hang if there is no work item left. Use join
882 instead.
883 """
884 self._outputs_exceptions_cond.acquire()
885 try:
886 while True:
887 if self._exceptions:
888 e = self._exceptions.pop(0)
889 raise e[0], e[1], e[2]
890 if self._outputs:
891 return self._outputs.pop(0)
892 self._outputs_exceptions_cond.wait()
893 finally:
894 self._outputs_exceptions_cond.release()
895
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000896 def close(self):
897 """Closes all the threads."""
898 for _ in range(len(self._workers)):
899 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000900 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000901 for t in self._workers:
902 t.join()
903
904 def __enter__(self):
905 """Enables 'with' statement."""
906 return self
907
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000908 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000909 """Enables 'with' statement."""
910 self.close()
911
912
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000913def valid_file(filepath, size):
914 """Determines if the given files appears valid (currently it just checks
915 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000916 if size == UNKNOWN_FILE_SIZE:
917 return True
918 actual_size = os.stat(filepath).st_size
919 if size != actual_size:
920 logging.warning(
921 'Found invalid item %s; %d != %d',
922 os.path.basename(filepath), actual_size, size)
923 return False
924 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000925
926
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000927class Profiler(object):
928 def __init__(self, name):
929 self.name = name
930 self.start_time = None
931
932 def __enter__(self):
933 self.start_time = time.time()
934 return self
935
936 def __exit__(self, _exc_type, _exec_value, _traceback):
937 time_taken = time.time() - self.start_time
938 logging.info('Profiling: Section %s took %3.3f seconds',
939 self.name, time_taken)
940
941
942class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000943 """Priority based worker queue to fetch or upload files from a
944 content-address server. Any function may be given as the fetcher/upload,
945 as long as it takes two inputs (the item contents, and their relative
946 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000947
948 Supports local file system, CIFS or http remotes.
949
950 When the priority of items is equals, works in strict FIFO mode.
951 """
952 # Initial and maximum number of worker threads.
953 INITIAL_WORKERS = 2
954 MAX_WORKERS = 16
955 # Priorities.
956 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
957 INTERNAL_PRIORITY_BITS = (1<<8) - 1
958 RETRIES = 5
959
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000960 def __init__(self, destination_root):
961 # Function to fetch a remote object or upload to a remote location..
962 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000963 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000964 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000965 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000966
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000967 def join(self):
968 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000969 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000970
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000971 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000972 """Retrieves an object from the remote data store.
973
974 The smaller |priority| gets fetched first.
975
976 Thread-safe.
977 """
978 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000979 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000980
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000981 def _add_item(self, priority, obj, dest, size):
982 assert isinstance(obj, basestring), obj
983 assert isinstance(dest, basestring), dest
984 assert size is None or isinstance(size, int), size
985 return self._pool.add_task(
986 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000987
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000988 def get_one_result(self):
989 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000990
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000991 def _task_executer(self, priority, obj, dest, size):
992 """Wraps self._do_item to trap and retry on IOError exceptions."""
993 try:
994 self._do_item(obj, dest)
995 if size and not valid_file(dest, size):
996 download_size = os.stat(dest).st_size
997 os.remove(dest)
998 raise IOError('File incorrect size after download of %s. Got %s and '
999 'expected %s' % (obj, download_size, size))
1000 # TODO(maruel): Technically, we'd want to have an output queue to be a
1001 # PriorityQueue.
1002 return obj
1003 except IOError as e:
1004 logging.debug('Caught IOError: %s', e)
1005 # Retry a few times, lowering the priority.
1006 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
1007 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001008 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001009 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001010
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001011 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001012 """Returns a object to retrieve objects from a remote."""
1013 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001014 def download_file(item, dest):
1015 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1016 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001017 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +00001018 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001019 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001020
1021 # Because the app engine DB is only eventually consistent, retry
1022 # 404 errors because the file might just not be visible yet (even
1023 # though it has been uploaded).
1024 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +00001025 if not connection:
1026 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001027 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +00001028 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001029 with open(dest, 'wb') as f:
1030 while True:
1031 chunk = connection.read(ZIPPED_FILE_CHUNK)
1032 if not chunk:
1033 break
maruel@chromium.org3f039182012-11-27 21:32:41 +00001034 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001035 f.write(decompressor.decompress(chunk))
1036 # Ensure that all the data was properly decompressed.
1037 uncompressed_data = decompressor.flush()
1038 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +00001039 except IOError:
1040 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
1041 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +00001042 except httplib.HTTPException as e:
1043 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +00001044 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001045 # Log the first bytes to see if it's uncompressed data.
1046 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +00001047 raise IOError(
1048 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
1049 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001050
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001051 return download_file
1052
1053 def copy_file(item, dest):
1054 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001055 if source == dest:
1056 logging.info('Source and destination are the same, no action required')
1057 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001058 logging.debug('copy_file(%s, %s)', source, dest)
1059 shutil.copy(source, dest)
1060 return copy_file
1061
1062
1063class CachePolicies(object):
1064 def __init__(self, max_cache_size, min_free_space, max_items):
1065 """
1066 Arguments:
1067 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1068 cache is effectively a leak.
1069 - min_free_space: Trim if disk free space becomes lower than this value. If
1070 0, it unconditionally fill the disk.
1071 - max_items: Maximum number of items to keep in the cache. If 0, do not
1072 enforce a limit.
1073 """
1074 self.max_cache_size = max_cache_size
1075 self.min_free_space = min_free_space
1076 self.max_items = max_items
1077
1078
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001079class NoCache(object):
1080 """This class is intended to be usable everywhere the Cache class is.
1081 Instead of downloading to a cache, all files are downloaded to the target
1082 directory and then moved to where they are needed.
1083 """
1084
1085 def __init__(self, target_directory, remote):
1086 self.target_directory = target_directory
1087 self.remote = remote
1088
1089 def retrieve(self, priority, item, size):
1090 """Get the request file."""
1091 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001092 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001093
1094 def wait_for(self, items):
1095 """Download the first item of the given list if it is missing."""
1096 item = items.iterkeys().next()
1097
1098 if not os.path.exists(self.path(item)):
1099 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001100 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001101 assert downloaded == item
1102
1103 return item
1104
1105 def path(self, item):
1106 return os.path.join(self.target_directory, item)
1107
1108
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001109class Cache(object):
1110 """Stateful LRU cache.
1111
1112 Saves its state as json file.
1113 """
1114 STATE_FILE = 'state.json'
1115
1116 def __init__(self, cache_dir, remote, policies):
1117 """
1118 Arguments:
1119 - cache_dir: Directory where to place the cache.
1120 - remote: Remote where to fetch items from.
1121 - policies: cache retention policies.
1122 """
1123 self.cache_dir = cache_dir
1124 self.remote = remote
1125 self.policies = policies
1126 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1127 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1128 # self.state[0] is the oldest item.
1129 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001130 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001131 # A lookup map to speed up searching.
1132 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001133 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001134
1135 # Items currently being fetched. Keep it local to reduce lock contention.
1136 self._pending_queue = set()
1137
1138 # Profiling values.
1139 self._added = []
1140 self._removed = []
1141 self._free_disk = 0
1142
maruel@chromium.org770993b2012-12-11 17:16:48 +00001143 with Profiler('Setup'):
1144 if not os.path.isdir(self.cache_dir):
1145 os.makedirs(self.cache_dir)
1146 if os.path.isfile(self.state_file):
1147 try:
1148 self.state = json.load(open(self.state_file, 'r'))
1149 except (IOError, ValueError), e:
1150 # Too bad. The file will be overwritten and the cache cleared.
1151 logging.error(
1152 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1153 self._state_need_to_be_saved = True
1154 if (not isinstance(self.state, list) or
1155 not all(
1156 isinstance(i, (list, tuple)) and len(i) == 2
1157 for i in self.state)):
1158 # Discard.
1159 self._state_need_to_be_saved = True
1160 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001161
maruel@chromium.org770993b2012-12-11 17:16:48 +00001162 # Ensure that all files listed in the state still exist and add new ones.
1163 previous = set(filename for filename, _ in self.state)
1164 if len(previous) != len(self.state):
1165 logging.warn('Cache state is corrupted, found duplicate files')
1166 self._state_need_to_be_saved = True
1167 self.state = []
1168
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001169 added = 0
1170 for filename in os.listdir(self.cache_dir):
1171 if filename == self.STATE_FILE:
1172 continue
1173 if filename in previous:
1174 previous.remove(filename)
1175 continue
1176 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001177 if not RE_IS_SHA1.match(filename):
1178 logging.warn('Removing unknown file %s from cache', filename)
1179 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001180 continue
1181 # Insert as the oldest file. It will be deleted eventually if not
1182 # accessed.
1183 self._add(filename, False)
1184 logging.warn('Add unknown file %s to cache', filename)
1185 added += 1
1186
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001187 if added:
1188 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001189 if previous:
1190 logging.warn('Removed %d lost files', len(previous))
1191 # Set explicitly in case self._add() wasn't called.
1192 self._state_need_to_be_saved = True
1193 # Filter out entries that were not found while keeping the previous
1194 # order.
1195 self.state = [
1196 (filename, size) for filename, size in self.state
1197 if filename not in previous
1198 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001199 self.trim()
1200
1201 def __enter__(self):
1202 return self
1203
1204 def __exit__(self, _exc_type, _exec_value, _traceback):
1205 with Profiler('CleanupTrimming'):
1206 self.trim()
1207
1208 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001209 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001210 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001211 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001212 len(self.state),
1213 sum(i[1] for i in self.state) / 1024)
1214 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001215 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1216 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001217
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001218 def remove_file_at_index(self, index):
1219 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001220 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001221 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001222 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001223 # If the lookup was already stale, its possible the filename was not
1224 # present yet.
1225 self._lookup_is_stale = True
1226 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001227 self._removed.append(size)
1228 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001229 except OSError as e:
1230 logging.error('Error attempting to delete a file\n%s' % e)
1231
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001232 def remove_lru_file(self):
1233 """Removes the last recently used file."""
1234 self.remove_file_at_index(0)
1235
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001236 def trim(self):
1237 """Trims anything we don't know, make sure enough free space exists."""
1238 # Ensure maximum cache size.
1239 if self.policies.max_cache_size and self.state:
1240 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1241 self.remove_lru_file()
1242
1243 # Ensure maximum number of items in the cache.
1244 if self.policies.max_items and self.state:
1245 while len(self.state) > self.policies.max_items:
1246 self.remove_lru_file()
1247
1248 # Ensure enough free space.
1249 self._free_disk = get_free_space(self.cache_dir)
1250 while (
1251 self.policies.min_free_space and
1252 self.state and
1253 self._free_disk < self.policies.min_free_space):
1254 self.remove_lru_file()
1255 self._free_disk = get_free_space(self.cache_dir)
1256
1257 self.save()
1258
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001259 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001260 """Retrieves a file from the remote, if not already cached, and adds it to
1261 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001262
1263 If the file is in the cache, verifiy that the file is valid (i.e. it is
1264 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001265 """
1266 assert not '/' in item
1267 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001268 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001269 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001270
1271 if index is not None:
1272 if not valid_file(self.path(item), size):
1273 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001274 index = None
1275 else:
1276 assert index < len(self.state)
1277 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001278 self._state_need_to_be_saved = True
1279 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001280 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001281
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001282 if index is None:
1283 if item in self._pending_queue:
1284 # Already pending. The same object could be referenced multiple times.
1285 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001286 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001287 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001288
1289 def add(self, filepath, obj):
1290 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001291 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001292 if not obj in self._lookup:
1293 link_file(self.path(obj), filepath, HARDLINK)
1294 self._add(obj, True)
1295
1296 def path(self, item):
1297 """Returns the path to one item."""
1298 return os.path.join(self.cache_dir, item)
1299
1300 def save(self):
1301 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001302 if self._state_need_to_be_saved:
1303 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1304 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001305
1306 def wait_for(self, items):
1307 """Starts a loop that waits for at least one of |items| to be retrieved.
1308
1309 Returns the first item retrieved.
1310 """
1311 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001312 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001313 for item in items:
1314 if item in self._lookup:
1315 return item
1316
1317 assert all(i in self._pending_queue for i in items), (
1318 items, self._pending_queue)
1319 # Note that:
1320 # len(self._pending_queue) ==
1321 # ( len(self.remote._workers) - self.remote._ready +
1322 # len(self._remote._queue) + len(self._remote.done))
1323 # There is no lock-free way to verify that.
1324 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001325 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001326 self._pending_queue.remove(item)
1327 self._add(item, True)
1328 if item in items:
1329 return item
1330
1331 def _add(self, item, at_end):
1332 """Adds an item in the internal state.
1333
1334 If |at_end| is False, self._lookup becomes inconsistent and
1335 self._update_lookup() must be called.
1336 """
1337 size = os.stat(self.path(item)).st_size
1338 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001339 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001340 if at_end:
1341 self.state.append((item, size))
1342 self._lookup[item] = len(self.state) - 1
1343 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001344 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001345 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001346
1347 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001348 if self._lookup_is_stale:
1349 self._lookup = dict(
1350 (filename, index) for index, (filename, _) in enumerate(self.state))
1351 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001352
1353
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001354class IsolatedFile(object):
1355 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001356 def __init__(self, obj_hash):
1357 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001358 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001359 self.obj_hash = obj_hash
1360 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001361 # .isolate and all the .isolated files recursively included by it with
1362 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1363 # .isolated file in the hash table, is important, as the later ones are not
1364 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001365 self.can_fetch = False
1366
1367 # Raw data.
1368 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001369 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001370 self.children = []
1371
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001372 # Set once the .isolated file is loaded.
1373 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001374 # Set once the files are fetched.
1375 self.files_fetched = False
1376
1377 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001378 """Verifies the .isolated file is valid and loads this object with the json
1379 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001380 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001381 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1382 assert not self._is_parsed
1383 self.data = load_isolated(content)
1384 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1385 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001386
1387 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001388 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001389
1390 Preemptively request files.
1391
1392 Note that |files| is modified by this function.
1393 """
1394 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001395 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001396 return
1397 logging.debug('fetch_files(%s)' % self.obj_hash)
1398 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001399 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001400 # overriden files must not be fetched.
1401 if filepath not in files:
1402 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001403 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001404 # Preemptively request files.
1405 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001406 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001407 self.files_fetched = True
1408
1409
1410class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001411 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001412 def __init__(self):
1413 self.command = []
1414 self.files = {}
1415 self.read_only = None
1416 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001417 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001418 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001419
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001420 def load(self, cache, root_isolated_hash):
1421 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001422
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001423 It enables support for "included" .isolated files. They are processed in
1424 strict order but fetched asynchronously from the cache. This is important so
1425 that a file in an included .isolated file that is overridden by an embedding
1426 .isolated file is not fetched neededlessly. The includes are fetched in one
1427 pass and the files are fetched as soon as all the ones on the left-side
1428 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001429
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001430 The prioritization is very important here for nested .isolated files.
1431 'includes' have the highest priority and the algorithm is optimized for both
1432 deep and wide trees. A deep one is a long link of .isolated files referenced
1433 one at a time by one item in 'includes'. A wide one has a large number of
1434 'includes' in a single .isolated file. 'left' is defined as an included
1435 .isolated file earlier in the 'includes' list. So the order of the elements
1436 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001437 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001438 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001439 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001440 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001441 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001442 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001443
1444 def update_self(node):
1445 node.fetch_files(cache, self.files)
1446 # Grabs properties.
1447 if not self.command and node.data.get('command'):
1448 self.command = node.data['command']
1449 if self.read_only is None and node.data.get('read_only') is not None:
1450 self.read_only = node.data['read_only']
1451 if (self.relative_cwd is None and
1452 node.data.get('relative_cwd') is not None):
1453 self.relative_cwd = node.data['relative_cwd']
1454
1455 def traverse_tree(node):
1456 if node.can_fetch:
1457 if not node.files_fetched:
1458 update_self(node)
1459 will_break = False
1460 for i in node.children:
1461 if not i.can_fetch:
1462 if will_break:
1463 break
1464 # Automatically mark the first one as fetcheable.
1465 i.can_fetch = True
1466 will_break = True
1467 traverse_tree(i)
1468
1469 while pending:
1470 item_hash = cache.wait_for(pending)
1471 item = pending.pop(item_hash)
1472 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001473 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001474 # It's the root item.
1475 item.can_fetch = True
1476
1477 for new_child in item.children:
1478 h = new_child.obj_hash
1479 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001480 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001481 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001482 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001483
1484 # Traverse the whole tree to see if files can now be fetched.
1485 traverse_tree(self.root)
1486 def check(n):
1487 return all(check(x) for x in n.children) and n.files_fetched
1488 assert check(self.root)
1489 self.relative_cwd = self.relative_cwd or ''
1490 self.read_only = self.read_only or False
1491
1492
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001493def create_directories(base_directory, files):
1494 """Creates the directory structure needed by the given list of files."""
1495 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1496 # Creates the tree of directories to create.
1497 directories = set(os.path.dirname(f) for f in files)
1498 for item in list(directories):
1499 while item:
1500 directories.add(item)
1501 item = os.path.dirname(item)
1502 for d in sorted(directories):
1503 if d:
1504 os.mkdir(os.path.join(base_directory, d))
1505
1506
1507def create_links(base_directory, files):
1508 """Creates any links needed by the given set of files."""
1509 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001510 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001511 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001512 if sys.platform == 'win32':
1513 # TODO(maruel): Create junctions or empty text files similar to what
1514 # cygwin do?
1515 logging.warning('Ignoring symlink %s', filepath)
1516 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001517 outfile = os.path.join(base_directory, filepath)
1518 # symlink doesn't exist on Windows. So the 'link' property should
1519 # never be specified for windows .isolated file.
1520 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1521 if 'm' in properties:
1522 lchmod = getattr(os, 'lchmod', None)
1523 if lchmod:
1524 lchmod(outfile, properties['m'])
1525
1526
1527def setup_commands(base_directory, cwd, cmd):
1528 """Correctly adjusts and then returns the required working directory
1529 and command needed to run the test.
1530 """
1531 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1532 cwd = os.path.join(base_directory, cwd)
1533 if not os.path.isdir(cwd):
1534 os.makedirs(cwd)
1535
1536 # Ensure paths are correctly separated on windows.
1537 cmd[0] = cmd[0].replace('/', os.path.sep)
1538 cmd = fix_python_path(cmd)
1539
1540 return cwd, cmd
1541
1542
1543def generate_remaining_files(files):
1544 """Generates a dictionary of all the remaining files to be downloaded."""
1545 remaining = {}
1546 for filepath, props in files:
1547 if 'h' in props:
1548 remaining.setdefault(props['h'], []).append((filepath, props))
1549
1550 return remaining
1551
1552
1553def download_test_data(isolated_hash, target_directory, remote):
1554 """Downloads the dependencies to the given directory."""
1555 if not os.path.exists(target_directory):
1556 os.makedirs(target_directory)
1557
1558 settings = Settings()
1559 no_cache = NoCache(target_directory, Remote(remote))
1560
1561 # Download all the isolated files.
1562 with Profiler('GetIsolateds') as _prof:
1563 settings.load(no_cache, isolated_hash)
1564
1565 if not settings.command:
1566 print >> sys.stderr, 'No command to run'
1567 return 1
1568
1569 with Profiler('GetRest') as _prof:
1570 create_directories(target_directory, settings.files)
1571 create_links(target_directory, settings.files.iteritems())
1572
1573 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1574 settings.command[:])
1575
1576 remaining = generate_remaining_files(settings.files.iteritems())
1577
1578 # Now block on the remaining files to be downloaded and mapped.
1579 logging.info('Retrieving remaining files')
1580 last_update = time.time()
1581 while remaining:
1582 obj = no_cache.wait_for(remaining)
1583 files = remaining.pop(obj)
1584
1585 for i, (filepath, properties) in enumerate(files):
1586 outfile = os.path.join(target_directory, filepath)
1587 logging.info(no_cache.path(obj))
1588
1589 if i + 1 == len(files):
1590 os.rename(no_cache.path(obj), outfile)
1591 else:
1592 shutil.copyfile(no_cache.path(obj), outfile)
1593
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001594 if 'm' in properties and not sys.platform == 'win32':
1595 # It's not set on Windows. It could be set only in the case of
1596 # downloading content generated from another OS. Do not crash in that
1597 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001598 os.chmod(outfile, properties['m'])
1599
1600 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1601 logging.info('%d files remaining...' % len(remaining))
1602 last_update = time.time()
1603
1604 print('.isolated files successfully downloaded and setup in %s' %
1605 target_directory)
1606 print('To run this test please run the command %s from the directory %s' %
1607 (cmd, cwd))
1608
1609 return 0
1610
1611
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001612def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001613 """Downloads the dependencies in the cache, hardlinks them into a temporary
1614 directory and runs the executable.
1615 """
1616 settings = Settings()
1617 with Cache(cache_dir, Remote(remote), policies) as cache:
1618 outdir = make_temp_dir('run_tha_test', cache_dir)
1619 try:
1620 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001621 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001622 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001623 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001624 # Adds it in the cache. While not strictly necessary, this simplifies
1625 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001626 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001627 cache.add(isolated_hash, h)
1628 isolated_hash = h
1629 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001630
1631 if not settings.command:
1632 print >> sys.stderr, 'No command to run'
1633 return 1
1634
1635 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001636 create_directories(outdir, settings.files)
1637 create_links(outdir, settings.files.iteritems())
1638 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001639
1640 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001641 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1642 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001643
1644 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001645 logging.info('Retrieving remaining files')
1646 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001647 while remaining:
1648 obj = cache.wait_for(remaining)
1649 for filepath, properties in remaining.pop(obj):
1650 outfile = os.path.join(outdir, filepath)
1651 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001652 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001653 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001654 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001655
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001656 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1657 logging.info('%d files remaining...' % len(remaining))
1658 last_update = time.time()
1659
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001660 if settings.read_only:
1661 make_writable(outdir, True)
1662 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001663
1664 # TODO(csharp): This should be specified somewhere else.
1665 # Add a rotating log file if one doesn't already exist.
1666 env = os.environ.copy()
1667 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001668 try:
1669 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001670 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001671 except OSError:
1672 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1673 raise
1674 finally:
1675 rmtree(outdir)
1676
1677
1678def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001679 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001680 parser = optparse.OptionParser(
1681 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1682 parser.add_option(
1683 '-v', '--verbose', action='count', default=0, help='Use multiple times')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001684
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001685 group = optparse.OptionGroup(parser, 'Download')
1686 group.add_option(
1687 '--download', metavar='DEST',
1688 help='Downloads files to DEST and returns without running, instead of '
1689 'downloading and then running from a temporary directory.')
1690 parser.add_option_group(group)
1691
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001692 group = optparse.OptionGroup(parser, 'Data source')
1693 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001694 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001695 metavar='FILE',
1696 help='File/url describing what to map or run')
1697 group.add_option(
1698 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001699 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001700 parser.add_option_group(group)
1701
1702 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001703 '-r', '--remote', metavar='URL',
1704 default=
1705 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1706 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001707 group = optparse.OptionGroup(parser, 'Cache management')
1708 group.add_option(
1709 '--cache',
1710 default='cache',
1711 metavar='DIR',
1712 help='Cache directory, default=%default')
1713 group.add_option(
1714 '--max-cache-size',
1715 type='int',
1716 metavar='NNN',
1717 default=20*1024*1024*1024,
1718 help='Trim if the cache gets larger than this value, default=%default')
1719 group.add_option(
1720 '--min-free-space',
1721 type='int',
1722 metavar='NNN',
1723 default=1*1024*1024*1024,
1724 help='Trim if disk free space becomes lower than this value, '
1725 'default=%default')
1726 group.add_option(
1727 '--max-items',
1728 type='int',
1729 metavar='NNN',
1730 default=100000,
1731 help='Trim if more than this number of items are in the cache '
1732 'default=%default')
1733 parser.add_option_group(group)
1734
1735 options, args = parser.parse_args()
1736 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001737
1738 logging_console = logging.StreamHandler()
1739 logging_console.setFormatter(logging.Formatter(
1740 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1741 logging_console.setLevel(level)
1742 logging.getLogger().addHandler(logging_console)
1743
1744 logging_rotating_file = logging.handlers.RotatingFileHandler(
1745 RUN_ISOLATED_LOG_FILE,
1746 maxBytes=10 * 1024 * 1024, backupCount=5)
1747 logging_rotating_file.setLevel(logging.DEBUG)
1748 logging_rotating_file.setFormatter(logging.Formatter(
1749 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1750 logging.getLogger().addHandler(logging_rotating_file)
1751
1752 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001753
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001754 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001755 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001756 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001757 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001758 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001759 parser.error('Unsupported args %s' % ' '.join(args))
1760
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001761 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001762 policies = CachePolicies(
1763 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001764
1765 if options.download:
1766 return download_test_data(options.isolated or options.hash,
1767 options.download, options.remote)
1768 else:
1769 try:
1770 return run_tha_test(
1771 options.isolated or options.hash,
1772 options.cache,
1773 options.remote,
1774 policies)
1775 except Exception, e:
1776 # Make sure any exception is logged.
1777 logging.exception(e)
1778 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001779
1780
1781if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001782 # Ensure that we are always running with the correct encoding.
1783 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001784 sys.exit(main())