blob: 3705fe21166e69b16323db0387c7c5281dee03ea [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000016import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000017import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000019import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000020import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000021import optparse
22import os
23import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000024import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000025import re
26import shutil
27import stat
28import subprocess
29import sys
30import tempfile
31import threading
32import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000033import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000035import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000036import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000037import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000039# Try to import 'upload' module used by AppEngineService for authentication.
40# If it is not there, app engine authentication support will be disabled.
41try:
42 from third_party import upload
43 # Hack out upload logging.info()
44 upload.logging = logging.getLogger('upload')
45 # Mac pylint choke on this line.
46 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
47except ImportError:
48 upload = None
49
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000050
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000051# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000052HARDLINK, SYMLINK, COPY = range(1, 4)
53
54RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
55
csharp@chromium.org8dc52542012-11-08 20:29:55 +000056# The file size to be used when we don't know the correct file size,
57# generally used for .isolated files.
58UNKNOWN_FILE_SIZE = None
59
csharp@chromium.orga92403f2012-11-20 15:13:59 +000060# The size of each chunk to read when downloading and unzipping files.
61ZIPPED_FILE_CHUNK = 16 * 1024
62
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000063# The name of the log file to use.
64RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
65
csharp@chromium.orge217f302012-11-22 16:51:53 +000066# The base directory containing this file.
67BASE_DIR = os.path.dirname(os.path.abspath(__file__))
68
69# The name of the log to use for the run_test_cases.py command
70RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
71
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000072# The delay (in seconds) to wait between logging statements when retrieving
73# the required files. This is intended to let the user (or buildbot) know that
74# the program is still running.
75DELAY_BETWEEN_UPDATES_IN_SECS = 30
76
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000077# The name of the key to store the count of url attempts.
78COUNT_KEY = 'UrlOpenAttempt'
79
80# The maximum number of attempts to trying opening a url before aborting.
csharp@chromium.orgf7b25462013-04-02 17:11:25 +000081MAX_URL_OPEN_ATTEMPTS = 30
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000082
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000083# Global (for now) map: server URL (http://example.com) -> HttpService instance.
84# Used by get_http_service to cache HttpService instances.
85_http_services = {}
86_http_services_lock = threading.Lock()
87
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +000088# Used by get_flavor().
89FLAVOR_MAPPING = {
90 'cygwin': 'win',
91 'win32': 'win',
92 'darwin': 'mac',
93 'sunos5': 'solaris',
94 'freebsd7': 'freebsd',
95 'freebsd8': 'freebsd',
96}
97
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000098
99class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000100 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000101 pass
102
103
104class MappingError(OSError):
105 """Failed to recreate the tree."""
106 pass
107
108
109def get_flavor():
110 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000111 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000112
113
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000114def fix_default_encoding():
115 """Forces utf8 solidly on all platforms.
116
117 By default python execution environment is lazy and defaults to ascii
118 encoding.
119
120 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
121 """
122 if sys.getdefaultencoding() == 'utf-8':
123 return False
124
125 # Regenerate setdefaultencoding.
126 reload(sys)
127 # Module 'sys' has no 'setdefaultencoding' member
128 # pylint: disable=E1101
129 sys.setdefaultencoding('utf-8')
130 for attr in dir(locale):
131 if attr[0:3] != 'LC_':
132 continue
133 aref = getattr(locale, attr)
134 try:
135 locale.setlocale(aref, '')
136 except locale.Error:
137 continue
138 try:
139 lang = locale.getlocale(aref)[0]
140 except (TypeError, ValueError):
141 continue
142 if lang:
143 try:
144 locale.setlocale(aref, (lang, 'UTF-8'))
145 except locale.Error:
146 os.environ[attr] = lang + '.UTF-8'
147 try:
148 locale.setlocale(locale.LC_ALL, '')
149 except locale.Error:
150 pass
151 return True
152
153
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000154class Unbuffered(object):
155 """Disable buffering on a file object."""
156 def __init__(self, stream):
157 self.stream = stream
158
159 def write(self, data):
160 self.stream.write(data)
161 if '\n' in data:
162 self.stream.flush()
163
164 def __getattr__(self, attr):
165 return getattr(self.stream, attr)
166
167
168def disable_buffering():
169 """Makes this process and child processes stdout unbuffered."""
170 if not os.environ.get('PYTHONUNBUFFERED'):
171 # Since sys.stdout is a C++ object, it's impossible to do
172 # sys.stdout.write = lambda...
173 sys.stdout = Unbuffered(sys.stdout)
174 os.environ['PYTHONUNBUFFERED'] = 'x'
175
176
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000177def os_link(source, link_name):
178 """Add support for os.link() on Windows."""
179 if sys.platform == 'win32':
180 if not ctypes.windll.kernel32.CreateHardLinkW(
181 unicode(link_name), unicode(source), 0):
182 raise OSError()
183 else:
184 os.link(source, link_name)
185
186
187def readable_copy(outfile, infile):
188 """Makes a copy of the file that is readable by everyone."""
189 shutil.copy(infile, outfile)
190 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
191 stat.S_IRGRP | stat.S_IROTH)
192 os.chmod(outfile, read_enabled_mode)
193
194
195def link_file(outfile, infile, action):
196 """Links a file. The type of link depends on |action|."""
197 logging.debug('Mapping %s to %s' % (infile, outfile))
198 if action not in (HARDLINK, SYMLINK, COPY):
199 raise ValueError('Unknown mapping action %s' % action)
200 if not os.path.isfile(infile):
201 raise MappingError('%s is missing' % infile)
202 if os.path.isfile(outfile):
203 raise MappingError(
204 '%s already exist; insize:%d; outsize:%d' %
205 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
206
207 if action == COPY:
208 readable_copy(outfile, infile)
209 elif action == SYMLINK and sys.platform != 'win32':
210 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000211 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000212 else:
213 try:
214 os_link(infile, outfile)
215 except OSError:
216 # Probably a different file system.
217 logging.warn(
218 'Failed to hardlink, failing back to copy %s to %s' % (
219 infile, outfile))
220 readable_copy(outfile, infile)
221
222
223def _set_write_bit(path, read_only):
224 """Sets or resets the executable bit on a file or directory."""
225 mode = os.lstat(path).st_mode
226 if read_only:
227 mode = mode & 0500
228 else:
229 mode = mode | 0200
230 if hasattr(os, 'lchmod'):
231 os.lchmod(path, mode) # pylint: disable=E1101
232 else:
233 if stat.S_ISLNK(mode):
234 # Skip symlink without lchmod() support.
235 logging.debug('Can\'t change +w bit on symlink %s' % path)
236 return
237
238 # TODO(maruel): Implement proper DACL modification on Windows.
239 os.chmod(path, mode)
240
241
242def make_writable(root, read_only):
243 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000244 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000245 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
246 for filename in filenames:
247 _set_write_bit(os.path.join(dirpath, filename), read_only)
248
249 for dirname in dirnames:
250 _set_write_bit(os.path.join(dirpath, dirname), read_only)
251
252
253def rmtree(root):
254 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
255 make_writable(root, False)
256 if sys.platform == 'win32':
257 for i in range(3):
258 try:
259 shutil.rmtree(root)
260 break
261 except WindowsError: # pylint: disable=E0602
262 delay = (i+1)*2
263 print >> sys.stderr, (
264 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
265 time.sleep(delay)
266 else:
267 shutil.rmtree(root)
268
269
270def is_same_filesystem(path1, path2):
271 """Returns True if both paths are on the same filesystem.
272
273 This is required to enable the use of hardlinks.
274 """
275 assert os.path.isabs(path1), path1
276 assert os.path.isabs(path2), path2
277 if sys.platform == 'win32':
278 # If the drive letter mismatches, assume it's a separate partition.
279 # TODO(maruel): It should look at the underlying drive, a drive letter could
280 # be a mount point to a directory on another drive.
281 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
282 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
283 if path1[0].lower() != path2[0].lower():
284 return False
285 return os.stat(path1).st_dev == os.stat(path2).st_dev
286
287
288def get_free_space(path):
289 """Returns the number of free bytes."""
290 if sys.platform == 'win32':
291 free_bytes = ctypes.c_ulonglong(0)
292 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
293 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
294 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000295 # For OSes other than Windows.
296 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000297 return f.f_bfree * f.f_frsize
298
299
300def make_temp_dir(prefix, root_dir):
301 """Returns a temporary directory on the same file system as root_dir."""
302 base_temp_dir = None
303 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
304 base_temp_dir = os.path.dirname(root_dir)
305 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
306
307
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000308def load_isolated(content):
309 """Verifies the .isolated file is valid and loads this object with the json
310 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000311 """
312 try:
313 data = json.loads(content)
314 except ValueError:
315 raise ConfigError('Failed to parse: %s...' % content[:100])
316
317 if not isinstance(data, dict):
318 raise ConfigError('Expected dict, got %r' % data)
319
320 for key, value in data.iteritems():
321 if key == 'command':
322 if not isinstance(value, list):
323 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000324 if not value:
325 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000326 for subvalue in value:
327 if not isinstance(subvalue, basestring):
328 raise ConfigError('Expected string, got %r' % subvalue)
329
330 elif key == 'files':
331 if not isinstance(value, dict):
332 raise ConfigError('Expected dict, got %r' % value)
333 for subkey, subvalue in value.iteritems():
334 if not isinstance(subkey, basestring):
335 raise ConfigError('Expected string, got %r' % subkey)
336 if not isinstance(subvalue, dict):
337 raise ConfigError('Expected dict, got %r' % subvalue)
338 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000339 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000340 if not isinstance(subsubvalue, basestring):
341 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000342 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000343 if not isinstance(subsubvalue, int):
344 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000345 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000346 if not RE_IS_SHA1.match(subsubvalue):
347 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000348 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 if not isinstance(subsubvalue, int):
350 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000351 else:
352 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000353 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000354 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000355 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
356 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000357
358 elif key == 'includes':
359 if not isinstance(value, list):
360 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000361 if not value:
362 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000363 for subvalue in value:
364 if not RE_IS_SHA1.match(subvalue):
365 raise ConfigError('Expected sha-1, got %r' % subvalue)
366
367 elif key == 'read_only':
368 if not isinstance(value, bool):
369 raise ConfigError('Expected bool, got %r' % value)
370
371 elif key == 'relative_cwd':
372 if not isinstance(value, basestring):
373 raise ConfigError('Expected string, got %r' % value)
374
375 elif key == 'os':
376 if value != get_flavor():
377 raise ConfigError(
378 'Expected \'os\' to be \'%s\' but got \'%s\'' %
379 (get_flavor(), value))
380
381 else:
382 raise ConfigError('Unknown key %s' % key)
383
384 return data
385
386
387def fix_python_path(cmd):
388 """Returns the fixed command line to call the right python executable."""
389 out = cmd[:]
390 if out[0] == 'python':
391 out[0] = sys.executable
392 elif out[0].endswith('.py'):
393 out.insert(0, sys.executable)
394 return out
395
396
maruel@chromium.orgef333122013-03-12 20:36:40 +0000397def url_open(url, data=None, retry_404=False, content_type=None):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000398 """Attempts to open the given url multiple times.
399
400 |data| can be either:
401 -None for a GET request
402 -str for pre-encoded data
403 -list for data to be encoded
404 -dict for data to be encoded (COUNT_KEY will be added in this case)
405
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000406 Returns a file-like object, where the response may be read from, or None
407 if it was unable to connect.
408 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000409 url_parts = list(urlparse.urlparse(url))
410 server_url = '%s://%s' % (url_parts[0], url_parts[1])
411 request_url = urlparse.urlunparse(['', ''] + url_parts[2:])
412 service = get_http_service(server_url)
413 return service.request(request_url, data, retry_404, content_type)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000414
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000415
416def get_http_service(url):
417 """Returns existing or creates new instance of HttpService that can send
418 requests to given base url.
419 """
420 with _http_services_lock:
421 service = _http_services.get(url)
422 if not service:
423 service = AppEngineService(url)
424 _http_services[url] = service
425 return service
426
427
428class HttpService(object):
429 """Base class for a class that provides an API to HTTP based service:
430 - Provides 'request' method.
431 - Supports automatic request retries.
432 - Supports persistent cookies.
433 - Thread safe.
434 """
435
436 # File to use to store all auth cookies.
maruel@chromium.org16452a32013-04-05 00:18:44 +0000437 COOKIE_FILE = os.path.join('~', '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000438
439 # CookieJar reused by all services + lock that protects its instantiation.
440 _cookie_jar = None
441 _cookie_jar_lock = threading.Lock()
442
443 def __init__(self, url):
444 self.url = str(url.rstrip('/'))
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000445 self.cookie_jar = self.load_cookie_jar()
446 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000447
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000448 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000449 """Called when HTTP server asks client to authenticate.
450 Can be implemented in subclasses.
451 """
452 return False
453
454 @staticmethod
455 def load_cookie_jar():
456 """Returns global CoookieJar object that stores cookies in the file."""
457 with HttpService._cookie_jar_lock:
458 if HttpService._cookie_jar is not None:
459 return HttpService._cookie_jar
460 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
461 jar.load()
462 HttpService._cookie_jar = jar
463 return jar
464
465 @staticmethod
466 def save_cookie_jar():
467 """Called when cookie jar needs to be flushed to disk."""
468 with HttpService._cookie_jar_lock:
469 if HttpService._cookie_jar is not None:
470 HttpService._cookie_jar.save()
471
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000472 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000473 """Returns OpenerDirector that will be used when sending requests.
474 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000475 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000476
477 def request(self, url, data=None, retry_404=False, content_type=None):
478 """Attempts to open the given url multiple times.
479
480 |url| is relative to the server root, i.e. '/some/request?param=1'.
481
482 |data| can be either:
483 -None for a GET request
484 -str for pre-encoded data
485 -list for data to be encoded
486 -dict for data to be encoded (COUNT_KEY will be added in this case)
487
488 Returns a file-like object, where the response may be read from, or None
489 if it was unable to connect.
490 """
491 assert url and url[0] == '/'
492
493 if isinstance(data, dict) and COUNT_KEY in data:
494 logging.error('%s already existed in the data passed into UlrOpen. It '
495 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
496 return None
497
498 method = 'GET' if data is None else 'POST'
499 assert not ((method != 'POST') and content_type), (
500 'Can\'t use content_type on GET')
501
502 def make_request(extra):
503 """Returns a urllib2.Request instance for this specific retry."""
504 if isinstance(data, str) or data is None:
505 payload = data
506 else:
507 if isinstance(data, dict):
508 payload = data.items()
509 else:
510 payload = data[:]
511 payload.extend(extra.iteritems())
512 payload = urllib.urlencode(payload)
513 new_url = urlparse.urljoin(self.url, url.lstrip('/'))
514 if isinstance(data, str) or data is None:
515 # In these cases, add the extra parameter to the query part of the url.
516 url_parts = list(urlparse.urlparse(new_url))
517 # Append the query parameter.
518 if url_parts[4] and extra:
519 url_parts[4] += '&'
520 url_parts[4] += urllib.urlencode(extra)
521 new_url = urlparse.urlunparse(url_parts)
522 request = urllib2.Request(new_url, data=payload)
523 if payload is not None:
524 if content_type:
525 request.add_header('Content-Type', content_type)
526 request.add_header('Content-Length', len(payload))
527 return request
528
529 return self._retry_loop(make_request, retry_404)
530
531 def _retry_loop(self, make_request, retry_404=False):
532 """Runs internal request-retry loop."""
533 authenticated = False
534 last_error = None
535 for attempt in range(MAX_URL_OPEN_ATTEMPTS):
536 extra = {COUNT_KEY: attempt} if attempt else {}
537 request = make_request(extra)
538 try:
539 url_response = self._url_open(request)
540 logging.debug('url_open(%s) succeeded', request.get_full_url())
541 return url_response
542 except urllib2.HTTPError as e:
543 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000544 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000545 # Try to authenticate only once. If it doesn't help, then server does
546 # not support app engine authentication.
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000547 logging.warning(
548 'Authentication is required for %s on attempt %d.\n%s',
549 request.get_full_url(), attempt,
550 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000551 if not authenticated and self.authenticate():
552 authenticated = True
553 continue
554 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000555 'Unable to authenticate to %s.\n%s',
556 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000557 return None
558
559 if e.code < 500 and not (retry_404 and e.code == 404):
560 # This HTTPError means we reached the server and there was a problem
561 # with the request, so don't retry.
562 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000563 'Able to connect to %s but an exception was thrown.\n%s',
564 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000565 return None
566
567 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000568 logging.warning('Able to connect to %s on attempt %d.\n%s',
569 request.get_full_url(), attempt,
570 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000571 last_error = e
572
573 except (urllib2.URLError, httplib.HTTPException) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000574 logging.warning('Unable to open url %s on attempt %d.\n%s',
575 request.get_full_url(), attempt,
576 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000577 last_error = e
578
579 # Only sleep if we are going to try again.
580 if attempt != MAX_URL_OPEN_ATTEMPTS - 1:
581 self._sleep_before_retry(attempt)
582
583 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000584 request.get_full_url(), MAX_URL_OPEN_ATTEMPTS,
585 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000586 return None
587
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000588 def _url_open(self, request):
589 """Low level method to execute urllib2.Request's.
590 To be mocked in tests.
591 """
592 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000593
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000594 def _sleep_before_retry(self, attempt): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000595 """Sleeps for some amount of time when retrying the request.
596 To be mocked in tests."""
597 duration = random.random() * 3 + math.pow(1.5, (attempt + 1))
598 duration = min(20, max(0.1, duration))
599 time.sleep(duration)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000600
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000601 @staticmethod
602 def _format_exception(exc, verbose=False):
603 """Given an instance of some exception raised by urlopen returns human
604 readable piece of text with detailed information about the error.
605 """
606 out = ['Exception: %s' % (exc,)]
607 if verbose:
608 if isinstance(exc, urllib2.HTTPError):
609 out.append('-' * 10)
610 if exc.hdrs:
611 for header, value in exc.hdrs.items():
612 if not header.startswith('x-'):
613 out.append('%s: %s' % (header.capitalize(), value))
614 out.append('')
615 out.append(exc.read() or '<empty body>')
616 out.append('-' * 10)
617 return '\n'.join(out)
618
maruel@chromium.orgef333122013-03-12 20:36:40 +0000619
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000620class AppEngineService(HttpService):
621 """This class implements authentication support for
622 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000623 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000624
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000625 # This lock ensures that user won't be confused with multiple concurrent
626 # login prompts.
627 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000628
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000629 def __init__(self, url, email=None, password=None):
630 super(AppEngineService, self).__init__(url)
631 self.email = email
632 self.password = password
633 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000634
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000635 def authenticate(self):
636 """Authenticates in the app engine application.
637 Returns True on success.
638 """
639 if not upload:
640 logging.warning('\'upload\' module is missing, '
641 'app engine authentication is disabled.')
642 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000643 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000644 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000645 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000646 class AuthServer(upload.AbstractRpcServer):
647 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000648 # Authentication code needs to know about 302 response.
649 # So make OpenerDirector without HTTPRedirectHandler.
650 opener = urllib2.OpenerDirector()
651 opener.add_handler(urllib2.ProxyHandler())
652 opener.add_handler(urllib2.UnknownHandler())
653 opener.add_handler(urllib2.HTTPHandler())
654 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
655 opener.add_handler(urllib2.HTTPSHandler())
656 opener.add_handler(urllib2.HTTPErrorProcessor())
657 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000658 return opener
659 def PerformAuthentication(self):
660 self._Authenticate()
661 save_cookie_jar()
662 return self.authenticated
663 with AppEngineService._auth_lock:
664 rpc_server = AuthServer(self.url, self.get_credentials)
665 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000666
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000667 def get_credentials(self):
668 """Called during authentication process to get the credentials.
669 May be called mutliple times if authentication fails.
670 Returns tuple (email, password).
671 """
672 # 'authenticate' calls this only if 'upload' is present.
673 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
674 assert upload, '\'upload\' module is required for this to work'
675 if self.email and self.password:
676 return (self.email, self.password)
677 if not self._keyring:
678 self._keyring = upload.KeyringCreds(self.url,
679 self.url.lower(),
680 self.email)
681 return self._keyring.GetUserCredentials()
682
683
684class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
685 """MozillaCookieJar with thread safe load and save."""
686
687 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
688 """Loads cookies from the file if it exists."""
689 filename = filename or self.filename
690 with self._cookies_lock:
691 if os.path.exists(filename):
692 try:
693 cookielib.MozillaCookieJar.load(self, filename,
694 ignore_discard,
695 ignore_expires)
696 logging.debug('Loaded cookies from %s', filename)
697 except (cookielib.LoadError, IOError):
698 pass
699 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000700 try:
701 fd = os.open(filename, os.O_CREAT, 0600)
702 os.close(fd)
703 except OSError:
704 logging.error('Failed to create %s', filename)
705 try:
706 os.chmod(filename, 0600)
707 except OSError:
708 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000709
710 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
711 """Saves cookies to the file, completely overwriting it."""
712 logging.debug('Saving cookies to %s', filename or self.filename)
713 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000714 try:
715 cookielib.MozillaCookieJar.save(self, filename,
716 ignore_discard,
717 ignore_expires)
718 except OSError:
719 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000720
721
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000722class ThreadPool(object):
723 """Implements a multithreaded worker pool oriented for mapping jobs with
724 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000725
726 Arguments:
727 - initial_threads: Number of threads to start immediately. Can be 0 if it is
728 uncertain that threads will be needed.
729 - max_threads: Maximum number of threads that will be started when all the
730 threads are busy working. Often the number of CPU cores.
731 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
732 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000733 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000734 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000735
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000736 def __init__(self, initial_threads, max_threads, queue_size):
737 logging.debug(
738 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
739 assert initial_threads <= max_threads
740 # Update this check once 256 cores CPU are common.
741 assert max_threads <= 256
742
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000743 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000744 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000745
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000746 # Mutables.
747 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000748 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000749 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000750 self._outputs = []
751 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000752 # Number of threads in wait state.
753 self._ready_lock = threading.Lock()
754 self._ready = 0
755 self._workers_lock = threading.Lock()
756 self._workers = []
757 for _ in range(initial_threads):
758 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000759
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000760 def _add_worker(self):
761 """Adds one worker thread if there isn't too many. Thread-safe."""
762 # Better to take the lock two times than hold it for too long.
763 with self._workers_lock:
764 if len(self._workers) >= self._max_threads:
765 return False
766 worker = threading.Thread(target=self._run)
767 with self._workers_lock:
768 if len(self._workers) >= self._max_threads:
769 return False
770 self._workers.append(worker)
771 worker.daemon = True
772 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000773
maruel@chromium.org831958f2013-01-22 15:01:46 +0000774 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000775 """Adds a task, a function to be executed by a worker.
776
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000777 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000778 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000779
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000780 |func| can either return a return value to be added to the output list or
781 be a generator which can emit multiple values.
782
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000783 Returns the index of the item added, e.g. the total number of enqueued items
784 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000785 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000786 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000787 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000788 with self._ready_lock:
789 start_new_worker = not self._ready
790 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000791 self._num_of_added_tasks += 1
792 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000793 self.tasks.put((priority, index, func, args, kwargs))
794 if start_new_worker:
795 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000796 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000797
798 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000799 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000800 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000801 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000802 with self._ready_lock:
803 self._ready += 1
804 task = self.tasks.get()
805 finally:
806 with self._ready_lock:
807 self._ready -= 1
808 try:
809 if task is None:
810 # We're done.
811 return
812 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000813 if inspect.isgeneratorfunction(func):
814 for out in func(*args, **kwargs):
815 self._output_append(out)
816 else:
817 out = func(*args, **kwargs)
818 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000819 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000820 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000821 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000822 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000823 self._outputs_exceptions_cond.acquire()
824 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000825 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000826 self._outputs_exceptions_cond.notifyAll()
827 finally:
828 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000829 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000830 try:
831 self.tasks.task_done()
832 except Exception as e:
833 # We need to catch and log this error here because this is the root
834 # function for the thread, nothing higher will catch the error.
835 logging.exception('Caught exception while marking task as done: %s',
836 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000837
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000838 def _output_append(self, out):
839 if out is not None:
840 self._outputs_exceptions_cond.acquire()
841 try:
842 self._outputs.append(out)
843 self._outputs_exceptions_cond.notifyAll()
844 finally:
845 self._outputs_exceptions_cond.release()
846
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000847 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000848 """Extracts all the results from each threads unordered.
849
850 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000851
852 Note: will wait for all work items to be done before returning an exception.
853 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000854 """
855 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000856 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000857 self._outputs_exceptions_cond.acquire()
858 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000859 if self._exceptions:
860 e = self._exceptions.pop(0)
861 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000862 out = self._outputs
863 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000864 finally:
865 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000866 return out
867
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000868 def get_one_result(self):
869 """Returns the next item that was generated or raises an exception if one
870 occured.
871
872 Warning: this function will hang if there is no work item left. Use join
873 instead.
874 """
875 self._outputs_exceptions_cond.acquire()
876 try:
877 while True:
878 if self._exceptions:
879 e = self._exceptions.pop(0)
880 raise e[0], e[1], e[2]
881 if self._outputs:
882 return self._outputs.pop(0)
883 self._outputs_exceptions_cond.wait()
884 finally:
885 self._outputs_exceptions_cond.release()
886
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000887 def close(self):
888 """Closes all the threads."""
889 for _ in range(len(self._workers)):
890 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000891 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000892 for t in self._workers:
893 t.join()
894
895 def __enter__(self):
896 """Enables 'with' statement."""
897 return self
898
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000899 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000900 """Enables 'with' statement."""
901 self.close()
902
903
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000904def valid_file(filepath, size):
905 """Determines if the given files appears valid (currently it just checks
906 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000907 if size == UNKNOWN_FILE_SIZE:
908 return True
909 actual_size = os.stat(filepath).st_size
910 if size != actual_size:
911 logging.warning(
912 'Found invalid item %s; %d != %d',
913 os.path.basename(filepath), actual_size, size)
914 return False
915 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000916
917
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000918class Profiler(object):
919 def __init__(self, name):
920 self.name = name
921 self.start_time = None
922
923 def __enter__(self):
924 self.start_time = time.time()
925 return self
926
927 def __exit__(self, _exc_type, _exec_value, _traceback):
928 time_taken = time.time() - self.start_time
929 logging.info('Profiling: Section %s took %3.3f seconds',
930 self.name, time_taken)
931
932
933class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000934 """Priority based worker queue to fetch or upload files from a
935 content-address server. Any function may be given as the fetcher/upload,
936 as long as it takes two inputs (the item contents, and their relative
937 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000938
939 Supports local file system, CIFS or http remotes.
940
941 When the priority of items is equals, works in strict FIFO mode.
942 """
943 # Initial and maximum number of worker threads.
944 INITIAL_WORKERS = 2
945 MAX_WORKERS = 16
946 # Priorities.
947 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
948 INTERNAL_PRIORITY_BITS = (1<<8) - 1
949 RETRIES = 5
950
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000951 def __init__(self, destination_root):
952 # Function to fetch a remote object or upload to a remote location..
953 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000954 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000955 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000956 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000957
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000958 def join(self):
959 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000960 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000961
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000962 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000963 """Retrieves an object from the remote data store.
964
965 The smaller |priority| gets fetched first.
966
967 Thread-safe.
968 """
969 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000970 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000971
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000972 def _add_item(self, priority, obj, dest, size):
973 assert isinstance(obj, basestring), obj
974 assert isinstance(dest, basestring), dest
975 assert size is None or isinstance(size, int), size
976 return self._pool.add_task(
977 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000978
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000979 def get_one_result(self):
980 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000981
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000982 def _task_executer(self, priority, obj, dest, size):
983 """Wraps self._do_item to trap and retry on IOError exceptions."""
984 try:
985 self._do_item(obj, dest)
986 if size and not valid_file(dest, size):
987 download_size = os.stat(dest).st_size
988 os.remove(dest)
989 raise IOError('File incorrect size after download of %s. Got %s and '
990 'expected %s' % (obj, download_size, size))
991 # TODO(maruel): Technically, we'd want to have an output queue to be a
992 # PriorityQueue.
993 return obj
994 except IOError as e:
995 logging.debug('Caught IOError: %s', e)
996 # Retry a few times, lowering the priority.
997 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
998 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000999 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001000 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001001
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +00001002 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001003 """Returns a object to retrieve objects from a remote."""
1004 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001005 def download_file(item, dest):
1006 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1007 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001008 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +00001009 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001010 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001011
1012 # Because the app engine DB is only eventually consistent, retry
1013 # 404 errors because the file might just not be visible yet (even
1014 # though it has been uploaded).
1015 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +00001016 if not connection:
1017 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001018 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +00001019 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001020 with open(dest, 'wb') as f:
1021 while True:
1022 chunk = connection.read(ZIPPED_FILE_CHUNK)
1023 if not chunk:
1024 break
maruel@chromium.org3f039182012-11-27 21:32:41 +00001025 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001026 f.write(decompressor.decompress(chunk))
1027 # Ensure that all the data was properly decompressed.
1028 uncompressed_data = decompressor.flush()
1029 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +00001030 except IOError:
1031 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
1032 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +00001033 except httplib.HTTPException as e:
1034 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +00001035 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001036 # Log the first bytes to see if it's uncompressed data.
1037 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +00001038 raise IOError(
1039 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
1040 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001041
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001042 return download_file
1043
1044 def copy_file(item, dest):
1045 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001046 if source == dest:
1047 logging.info('Source and destination are the same, no action required')
1048 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001049 logging.debug('copy_file(%s, %s)', source, dest)
1050 shutil.copy(source, dest)
1051 return copy_file
1052
1053
1054class CachePolicies(object):
1055 def __init__(self, max_cache_size, min_free_space, max_items):
1056 """
1057 Arguments:
1058 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1059 cache is effectively a leak.
1060 - min_free_space: Trim if disk free space becomes lower than this value. If
1061 0, it unconditionally fill the disk.
1062 - max_items: Maximum number of items to keep in the cache. If 0, do not
1063 enforce a limit.
1064 """
1065 self.max_cache_size = max_cache_size
1066 self.min_free_space = min_free_space
1067 self.max_items = max_items
1068
1069
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001070class NoCache(object):
1071 """This class is intended to be usable everywhere the Cache class is.
1072 Instead of downloading to a cache, all files are downloaded to the target
1073 directory and then moved to where they are needed.
1074 """
1075
1076 def __init__(self, target_directory, remote):
1077 self.target_directory = target_directory
1078 self.remote = remote
1079
1080 def retrieve(self, priority, item, size):
1081 """Get the request file."""
1082 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001083 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001084
1085 def wait_for(self, items):
1086 """Download the first item of the given list if it is missing."""
1087 item = items.iterkeys().next()
1088
1089 if not os.path.exists(self.path(item)):
1090 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001091 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001092 assert downloaded == item
1093
1094 return item
1095
1096 def path(self, item):
1097 return os.path.join(self.target_directory, item)
1098
1099
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001100class Cache(object):
1101 """Stateful LRU cache.
1102
1103 Saves its state as json file.
1104 """
1105 STATE_FILE = 'state.json'
1106
1107 def __init__(self, cache_dir, remote, policies):
1108 """
1109 Arguments:
1110 - cache_dir: Directory where to place the cache.
1111 - remote: Remote where to fetch items from.
1112 - policies: cache retention policies.
1113 """
1114 self.cache_dir = cache_dir
1115 self.remote = remote
1116 self.policies = policies
1117 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1118 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1119 # self.state[0] is the oldest item.
1120 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001121 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001122 # A lookup map to speed up searching.
1123 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001124 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001125
1126 # Items currently being fetched. Keep it local to reduce lock contention.
1127 self._pending_queue = set()
1128
1129 # Profiling values.
1130 self._added = []
1131 self._removed = []
1132 self._free_disk = 0
1133
maruel@chromium.org770993b2012-12-11 17:16:48 +00001134 with Profiler('Setup'):
1135 if not os.path.isdir(self.cache_dir):
1136 os.makedirs(self.cache_dir)
1137 if os.path.isfile(self.state_file):
1138 try:
1139 self.state = json.load(open(self.state_file, 'r'))
1140 except (IOError, ValueError), e:
1141 # Too bad. The file will be overwritten and the cache cleared.
1142 logging.error(
1143 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1144 self._state_need_to_be_saved = True
1145 if (not isinstance(self.state, list) or
1146 not all(
1147 isinstance(i, (list, tuple)) and len(i) == 2
1148 for i in self.state)):
1149 # Discard.
1150 self._state_need_to_be_saved = True
1151 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001152
maruel@chromium.org770993b2012-12-11 17:16:48 +00001153 # Ensure that all files listed in the state still exist and add new ones.
1154 previous = set(filename for filename, _ in self.state)
1155 if len(previous) != len(self.state):
1156 logging.warn('Cache state is corrupted, found duplicate files')
1157 self._state_need_to_be_saved = True
1158 self.state = []
1159
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001160 added = 0
1161 for filename in os.listdir(self.cache_dir):
1162 if filename == self.STATE_FILE:
1163 continue
1164 if filename in previous:
1165 previous.remove(filename)
1166 continue
1167 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001168 if not RE_IS_SHA1.match(filename):
1169 logging.warn('Removing unknown file %s from cache', filename)
1170 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001171 continue
1172 # Insert as the oldest file. It will be deleted eventually if not
1173 # accessed.
1174 self._add(filename, False)
1175 logging.warn('Add unknown file %s to cache', filename)
1176 added += 1
1177
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001178 if added:
1179 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001180 if previous:
1181 logging.warn('Removed %d lost files', len(previous))
1182 # Set explicitly in case self._add() wasn't called.
1183 self._state_need_to_be_saved = True
1184 # Filter out entries that were not found while keeping the previous
1185 # order.
1186 self.state = [
1187 (filename, size) for filename, size in self.state
1188 if filename not in previous
1189 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001190 self.trim()
1191
1192 def __enter__(self):
1193 return self
1194
1195 def __exit__(self, _exc_type, _exec_value, _traceback):
1196 with Profiler('CleanupTrimming'):
1197 self.trim()
1198
1199 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001200 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001201 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001202 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001203 len(self.state),
1204 sum(i[1] for i in self.state) / 1024)
1205 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001206 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1207 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001208
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001209 def remove_file_at_index(self, index):
1210 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001211 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001212 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001213 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001214 # If the lookup was already stale, its possible the filename was not
1215 # present yet.
1216 self._lookup_is_stale = True
1217 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001218 self._removed.append(size)
1219 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001220 except OSError as e:
1221 logging.error('Error attempting to delete a file\n%s' % e)
1222
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001223 def remove_lru_file(self):
1224 """Removes the last recently used file."""
1225 self.remove_file_at_index(0)
1226
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001227 def trim(self):
1228 """Trims anything we don't know, make sure enough free space exists."""
1229 # Ensure maximum cache size.
1230 if self.policies.max_cache_size and self.state:
1231 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1232 self.remove_lru_file()
1233
1234 # Ensure maximum number of items in the cache.
1235 if self.policies.max_items and self.state:
1236 while len(self.state) > self.policies.max_items:
1237 self.remove_lru_file()
1238
1239 # Ensure enough free space.
1240 self._free_disk = get_free_space(self.cache_dir)
1241 while (
1242 self.policies.min_free_space and
1243 self.state and
1244 self._free_disk < self.policies.min_free_space):
1245 self.remove_lru_file()
1246 self._free_disk = get_free_space(self.cache_dir)
1247
1248 self.save()
1249
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001250 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001251 """Retrieves a file from the remote, if not already cached, and adds it to
1252 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001253
1254 If the file is in the cache, verifiy that the file is valid (i.e. it is
1255 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001256 """
1257 assert not '/' in item
1258 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001259 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001260 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001261
1262 if index is not None:
1263 if not valid_file(self.path(item), size):
1264 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001265 index = None
1266 else:
1267 assert index < len(self.state)
1268 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001269 self._state_need_to_be_saved = True
1270 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001271 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001272
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001273 if index is None:
1274 if item in self._pending_queue:
1275 # Already pending. The same object could be referenced multiple times.
1276 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001277 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001278 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001279
1280 def add(self, filepath, obj):
1281 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001282 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001283 if not obj in self._lookup:
1284 link_file(self.path(obj), filepath, HARDLINK)
1285 self._add(obj, True)
1286
1287 def path(self, item):
1288 """Returns the path to one item."""
1289 return os.path.join(self.cache_dir, item)
1290
1291 def save(self):
1292 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001293 if self._state_need_to_be_saved:
1294 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1295 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001296
1297 def wait_for(self, items):
1298 """Starts a loop that waits for at least one of |items| to be retrieved.
1299
1300 Returns the first item retrieved.
1301 """
1302 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001303 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001304 for item in items:
1305 if item in self._lookup:
1306 return item
1307
1308 assert all(i in self._pending_queue for i in items), (
1309 items, self._pending_queue)
1310 # Note that:
1311 # len(self._pending_queue) ==
1312 # ( len(self.remote._workers) - self.remote._ready +
1313 # len(self._remote._queue) + len(self._remote.done))
1314 # There is no lock-free way to verify that.
1315 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001316 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001317 self._pending_queue.remove(item)
1318 self._add(item, True)
1319 if item in items:
1320 return item
1321
1322 def _add(self, item, at_end):
1323 """Adds an item in the internal state.
1324
1325 If |at_end| is False, self._lookup becomes inconsistent and
1326 self._update_lookup() must be called.
1327 """
1328 size = os.stat(self.path(item)).st_size
1329 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001330 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001331 if at_end:
1332 self.state.append((item, size))
1333 self._lookup[item] = len(self.state) - 1
1334 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001335 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001336 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001337
1338 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001339 if self._lookup_is_stale:
1340 self._lookup = dict(
1341 (filename, index) for index, (filename, _) in enumerate(self.state))
1342 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001343
1344
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001345class IsolatedFile(object):
1346 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001347 def __init__(self, obj_hash):
1348 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001349 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001350 self.obj_hash = obj_hash
1351 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001352 # .isolate and all the .isolated files recursively included by it with
1353 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1354 # .isolated file in the hash table, is important, as the later ones are not
1355 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001356 self.can_fetch = False
1357
1358 # Raw data.
1359 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001360 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001361 self.children = []
1362
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001363 # Set once the .isolated file is loaded.
1364 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001365 # Set once the files are fetched.
1366 self.files_fetched = False
1367
1368 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001369 """Verifies the .isolated file is valid and loads this object with the json
1370 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001371 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001372 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1373 assert not self._is_parsed
1374 self.data = load_isolated(content)
1375 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1376 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001377
1378 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001379 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001380
1381 Preemptively request files.
1382
1383 Note that |files| is modified by this function.
1384 """
1385 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001386 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001387 return
1388 logging.debug('fetch_files(%s)' % self.obj_hash)
1389 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001390 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001391 # overriden files must not be fetched.
1392 if filepath not in files:
1393 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001394 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001395 # Preemptively request files.
1396 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001397 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001398 self.files_fetched = True
1399
1400
1401class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001402 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001403 def __init__(self):
1404 self.command = []
1405 self.files = {}
1406 self.read_only = None
1407 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001408 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001409 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001410
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001411 def load(self, cache, root_isolated_hash):
1412 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001413
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001414 It enables support for "included" .isolated files. They are processed in
1415 strict order but fetched asynchronously from the cache. This is important so
1416 that a file in an included .isolated file that is overridden by an embedding
1417 .isolated file is not fetched neededlessly. The includes are fetched in one
1418 pass and the files are fetched as soon as all the ones on the left-side
1419 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001420
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001421 The prioritization is very important here for nested .isolated files.
1422 'includes' have the highest priority and the algorithm is optimized for both
1423 deep and wide trees. A deep one is a long link of .isolated files referenced
1424 one at a time by one item in 'includes'. A wide one has a large number of
1425 'includes' in a single .isolated file. 'left' is defined as an included
1426 .isolated file earlier in the 'includes' list. So the order of the elements
1427 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001428 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001429 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001430 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001431 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001432 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001433 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001434
1435 def update_self(node):
1436 node.fetch_files(cache, self.files)
1437 # Grabs properties.
1438 if not self.command and node.data.get('command'):
1439 self.command = node.data['command']
1440 if self.read_only is None and node.data.get('read_only') is not None:
1441 self.read_only = node.data['read_only']
1442 if (self.relative_cwd is None and
1443 node.data.get('relative_cwd') is not None):
1444 self.relative_cwd = node.data['relative_cwd']
1445
1446 def traverse_tree(node):
1447 if node.can_fetch:
1448 if not node.files_fetched:
1449 update_self(node)
1450 will_break = False
1451 for i in node.children:
1452 if not i.can_fetch:
1453 if will_break:
1454 break
1455 # Automatically mark the first one as fetcheable.
1456 i.can_fetch = True
1457 will_break = True
1458 traverse_tree(i)
1459
1460 while pending:
1461 item_hash = cache.wait_for(pending)
1462 item = pending.pop(item_hash)
1463 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001464 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001465 # It's the root item.
1466 item.can_fetch = True
1467
1468 for new_child in item.children:
1469 h = new_child.obj_hash
1470 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001471 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001472 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001473 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001474
1475 # Traverse the whole tree to see if files can now be fetched.
1476 traverse_tree(self.root)
1477 def check(n):
1478 return all(check(x) for x in n.children) and n.files_fetched
1479 assert check(self.root)
1480 self.relative_cwd = self.relative_cwd or ''
1481 self.read_only = self.read_only or False
1482
1483
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001484def create_directories(base_directory, files):
1485 """Creates the directory structure needed by the given list of files."""
1486 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1487 # Creates the tree of directories to create.
1488 directories = set(os.path.dirname(f) for f in files)
1489 for item in list(directories):
1490 while item:
1491 directories.add(item)
1492 item = os.path.dirname(item)
1493 for d in sorted(directories):
1494 if d:
1495 os.mkdir(os.path.join(base_directory, d))
1496
1497
1498def create_links(base_directory, files):
1499 """Creates any links needed by the given set of files."""
1500 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001501 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001502 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001503 if sys.platform == 'win32':
1504 # TODO(maruel): Create junctions or empty text files similar to what
1505 # cygwin do?
1506 logging.warning('Ignoring symlink %s', filepath)
1507 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001508 outfile = os.path.join(base_directory, filepath)
1509 # symlink doesn't exist on Windows. So the 'link' property should
1510 # never be specified for windows .isolated file.
1511 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1512 if 'm' in properties:
1513 lchmod = getattr(os, 'lchmod', None)
1514 if lchmod:
1515 lchmod(outfile, properties['m'])
1516
1517
1518def setup_commands(base_directory, cwd, cmd):
1519 """Correctly adjusts and then returns the required working directory
1520 and command needed to run the test.
1521 """
1522 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1523 cwd = os.path.join(base_directory, cwd)
1524 if not os.path.isdir(cwd):
1525 os.makedirs(cwd)
1526
1527 # Ensure paths are correctly separated on windows.
1528 cmd[0] = cmd[0].replace('/', os.path.sep)
1529 cmd = fix_python_path(cmd)
1530
1531 return cwd, cmd
1532
1533
1534def generate_remaining_files(files):
1535 """Generates a dictionary of all the remaining files to be downloaded."""
1536 remaining = {}
1537 for filepath, props in files:
1538 if 'h' in props:
1539 remaining.setdefault(props['h'], []).append((filepath, props))
1540
1541 return remaining
1542
1543
1544def download_test_data(isolated_hash, target_directory, remote):
1545 """Downloads the dependencies to the given directory."""
1546 if not os.path.exists(target_directory):
1547 os.makedirs(target_directory)
1548
1549 settings = Settings()
1550 no_cache = NoCache(target_directory, Remote(remote))
1551
1552 # Download all the isolated files.
1553 with Profiler('GetIsolateds') as _prof:
1554 settings.load(no_cache, isolated_hash)
1555
1556 if not settings.command:
1557 print >> sys.stderr, 'No command to run'
1558 return 1
1559
1560 with Profiler('GetRest') as _prof:
1561 create_directories(target_directory, settings.files)
1562 create_links(target_directory, settings.files.iteritems())
1563
1564 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1565 settings.command[:])
1566
1567 remaining = generate_remaining_files(settings.files.iteritems())
1568
1569 # Now block on the remaining files to be downloaded and mapped.
1570 logging.info('Retrieving remaining files')
1571 last_update = time.time()
1572 while remaining:
1573 obj = no_cache.wait_for(remaining)
1574 files = remaining.pop(obj)
1575
1576 for i, (filepath, properties) in enumerate(files):
1577 outfile = os.path.join(target_directory, filepath)
1578 logging.info(no_cache.path(obj))
1579
1580 if i + 1 == len(files):
1581 os.rename(no_cache.path(obj), outfile)
1582 else:
1583 shutil.copyfile(no_cache.path(obj), outfile)
1584
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001585 if 'm' in properties and not sys.platform == 'win32':
1586 # It's not set on Windows. It could be set only in the case of
1587 # downloading content generated from another OS. Do not crash in that
1588 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001589 os.chmod(outfile, properties['m'])
1590
1591 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1592 logging.info('%d files remaining...' % len(remaining))
1593 last_update = time.time()
1594
1595 print('.isolated files successfully downloaded and setup in %s' %
1596 target_directory)
1597 print('To run this test please run the command %s from the directory %s' %
1598 (cmd, cwd))
1599
1600 return 0
1601
1602
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001603def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001604 """Downloads the dependencies in the cache, hardlinks them into a temporary
1605 directory and runs the executable.
1606 """
1607 settings = Settings()
1608 with Cache(cache_dir, Remote(remote), policies) as cache:
1609 outdir = make_temp_dir('run_tha_test', cache_dir)
1610 try:
1611 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001612 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001613 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001614 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001615 # Adds it in the cache. While not strictly necessary, this simplifies
1616 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001617 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001618 cache.add(isolated_hash, h)
1619 isolated_hash = h
1620 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001621
1622 if not settings.command:
1623 print >> sys.stderr, 'No command to run'
1624 return 1
1625
1626 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001627 create_directories(outdir, settings.files)
1628 create_links(outdir, settings.files.iteritems())
1629 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001630
1631 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001632 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1633 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001634
1635 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001636 logging.info('Retrieving remaining files')
1637 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001638 while remaining:
1639 obj = cache.wait_for(remaining)
1640 for filepath, properties in remaining.pop(obj):
1641 outfile = os.path.join(outdir, filepath)
1642 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001643 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001644 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001645 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001646
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001647 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1648 logging.info('%d files remaining...' % len(remaining))
1649 last_update = time.time()
1650
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001651 if settings.read_only:
1652 make_writable(outdir, True)
1653 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001654
1655 # TODO(csharp): This should be specified somewhere else.
1656 # Add a rotating log file if one doesn't already exist.
1657 env = os.environ.copy()
1658 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001659 try:
1660 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001661 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001662 except OSError:
1663 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1664 raise
1665 finally:
1666 rmtree(outdir)
1667
1668
1669def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001670 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001671 parser = optparse.OptionParser(
1672 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1673 parser.add_option(
1674 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1675 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1676
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001677 group = optparse.OptionGroup(parser, 'Download')
1678 group.add_option(
1679 '--download', metavar='DEST',
1680 help='Downloads files to DEST and returns without running, instead of '
1681 'downloading and then running from a temporary directory.')
1682 parser.add_option_group(group)
1683
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001684 group = optparse.OptionGroup(parser, 'Data source')
1685 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001686 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001687 metavar='FILE',
1688 help='File/url describing what to map or run')
1689 group.add_option(
1690 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001691 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001692 parser.add_option_group(group)
1693
1694 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001695 '-r', '--remote', metavar='URL',
1696 default=
1697 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1698 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001699 group = optparse.OptionGroup(parser, 'Cache management')
1700 group.add_option(
1701 '--cache',
1702 default='cache',
1703 metavar='DIR',
1704 help='Cache directory, default=%default')
1705 group.add_option(
1706 '--max-cache-size',
1707 type='int',
1708 metavar='NNN',
1709 default=20*1024*1024*1024,
1710 help='Trim if the cache gets larger than this value, default=%default')
1711 group.add_option(
1712 '--min-free-space',
1713 type='int',
1714 metavar='NNN',
1715 default=1*1024*1024*1024,
1716 help='Trim if disk free space becomes lower than this value, '
1717 'default=%default')
1718 group.add_option(
1719 '--max-items',
1720 type='int',
1721 metavar='NNN',
1722 default=100000,
1723 help='Trim if more than this number of items are in the cache '
1724 'default=%default')
1725 parser.add_option_group(group)
1726
1727 options, args = parser.parse_args()
1728 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001729
1730 logging_console = logging.StreamHandler()
1731 logging_console.setFormatter(logging.Formatter(
1732 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1733 logging_console.setLevel(level)
1734 logging.getLogger().addHandler(logging_console)
1735
1736 logging_rotating_file = logging.handlers.RotatingFileHandler(
1737 RUN_ISOLATED_LOG_FILE,
1738 maxBytes=10 * 1024 * 1024, backupCount=5)
1739 logging_rotating_file.setLevel(logging.DEBUG)
1740 logging_rotating_file.setFormatter(logging.Formatter(
1741 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1742 logging.getLogger().addHandler(logging_rotating_file)
1743
1744 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001745
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001746 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001747 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001748 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001749 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001750 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001751 parser.error('Unsupported args %s' % ' '.join(args))
1752
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001753 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001754 policies = CachePolicies(
1755 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001756
1757 if options.download:
1758 return download_test_data(options.isolated or options.hash,
1759 options.download, options.remote)
1760 else:
1761 try:
1762 return run_tha_test(
1763 options.isolated or options.hash,
1764 options.cache,
1765 options.remote,
1766 policies)
1767 except Exception, e:
1768 # Make sure any exception is logged.
1769 logging.exception(e)
1770 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001771
1772
1773if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001774 # Ensure that we are always running with the correct encoding.
1775 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001776 sys.exit(main())