blob: 7fb301e3d5975e60e335dbdd0a056a0b8677ff01 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000016import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000017import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000019import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000020import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000021import optparse
22import os
23import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000024import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000025import re
26import shutil
27import stat
28import subprocess
29import sys
30import tempfile
31import threading
32import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000033import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000035import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000036import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000037import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000039# Try to import 'upload' module used by AppEngineService for authentication.
40# If it is not there, app engine authentication support will be disabled.
41try:
42 from third_party import upload
43 # Hack out upload logging.info()
44 upload.logging = logging.getLogger('upload')
45 # Mac pylint choke on this line.
46 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
47except ImportError:
48 upload = None
49
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000050
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000051# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000052HARDLINK, SYMLINK, COPY = range(1, 4)
53
54RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
55
csharp@chromium.org8dc52542012-11-08 20:29:55 +000056# The file size to be used when we don't know the correct file size,
57# generally used for .isolated files.
58UNKNOWN_FILE_SIZE = None
59
csharp@chromium.orga92403f2012-11-20 15:13:59 +000060# The size of each chunk to read when downloading and unzipping files.
61ZIPPED_FILE_CHUNK = 16 * 1024
62
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000063# The name of the log file to use.
64RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
65
csharp@chromium.orge217f302012-11-22 16:51:53 +000066# The base directory containing this file.
67BASE_DIR = os.path.dirname(os.path.abspath(__file__))
68
69# The name of the log to use for the run_test_cases.py command
70RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
71
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000072# The delay (in seconds) to wait between logging statements when retrieving
73# the required files. This is intended to let the user (or buildbot) know that
74# the program is still running.
75DELAY_BETWEEN_UPDATES_IN_SECS = 30
76
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000077# The name of the key to store the count of url attempts.
78COUNT_KEY = 'UrlOpenAttempt'
79
80# The maximum number of attempts to trying opening a url before aborting.
csharp@chromium.orgf7b25462013-04-02 17:11:25 +000081MAX_URL_OPEN_ATTEMPTS = 30
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000082
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000083# Global (for now) map: server URL (http://example.com) -> HttpService instance.
84# Used by get_http_service to cache HttpService instances.
85_http_services = {}
86_http_services_lock = threading.Lock()
87
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +000088# Used by get_flavor().
89FLAVOR_MAPPING = {
90 'cygwin': 'win',
91 'win32': 'win',
92 'darwin': 'mac',
93 'sunos5': 'solaris',
94 'freebsd7': 'freebsd',
95 'freebsd8': 'freebsd',
96}
97
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000098
99class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000100 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000101 pass
102
103
104class MappingError(OSError):
105 """Failed to recreate the tree."""
106 pass
107
108
109def get_flavor():
110 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000111 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000112
113
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000114def fix_default_encoding():
115 """Forces utf8 solidly on all platforms.
116
117 By default python execution environment is lazy and defaults to ascii
118 encoding.
119
120 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
121 """
122 if sys.getdefaultencoding() == 'utf-8':
123 return False
124
125 # Regenerate setdefaultencoding.
126 reload(sys)
127 # Module 'sys' has no 'setdefaultencoding' member
128 # pylint: disable=E1101
129 sys.setdefaultencoding('utf-8')
130 for attr in dir(locale):
131 if attr[0:3] != 'LC_':
132 continue
133 aref = getattr(locale, attr)
134 try:
135 locale.setlocale(aref, '')
136 except locale.Error:
137 continue
138 try:
139 lang = locale.getlocale(aref)[0]
140 except (TypeError, ValueError):
141 continue
142 if lang:
143 try:
144 locale.setlocale(aref, (lang, 'UTF-8'))
145 except locale.Error:
146 os.environ[attr] = lang + '.UTF-8'
147 try:
148 locale.setlocale(locale.LC_ALL, '')
149 except locale.Error:
150 pass
151 return True
152
153
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000154class Unbuffered(object):
155 """Disable buffering on a file object."""
156 def __init__(self, stream):
157 self.stream = stream
158
159 def write(self, data):
160 self.stream.write(data)
161 if '\n' in data:
162 self.stream.flush()
163
164 def __getattr__(self, attr):
165 return getattr(self.stream, attr)
166
167
168def disable_buffering():
169 """Makes this process and child processes stdout unbuffered."""
170 if not os.environ.get('PYTHONUNBUFFERED'):
171 # Since sys.stdout is a C++ object, it's impossible to do
172 # sys.stdout.write = lambda...
173 sys.stdout = Unbuffered(sys.stdout)
174 os.environ['PYTHONUNBUFFERED'] = 'x'
175
176
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000177def os_link(source, link_name):
178 """Add support for os.link() on Windows."""
179 if sys.platform == 'win32':
180 if not ctypes.windll.kernel32.CreateHardLinkW(
181 unicode(link_name), unicode(source), 0):
182 raise OSError()
183 else:
184 os.link(source, link_name)
185
186
187def readable_copy(outfile, infile):
188 """Makes a copy of the file that is readable by everyone."""
189 shutil.copy(infile, outfile)
190 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
191 stat.S_IRGRP | stat.S_IROTH)
192 os.chmod(outfile, read_enabled_mode)
193
194
195def link_file(outfile, infile, action):
196 """Links a file. The type of link depends on |action|."""
197 logging.debug('Mapping %s to %s' % (infile, outfile))
198 if action not in (HARDLINK, SYMLINK, COPY):
199 raise ValueError('Unknown mapping action %s' % action)
200 if not os.path.isfile(infile):
201 raise MappingError('%s is missing' % infile)
202 if os.path.isfile(outfile):
203 raise MappingError(
204 '%s already exist; insize:%d; outsize:%d' %
205 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
206
207 if action == COPY:
208 readable_copy(outfile, infile)
209 elif action == SYMLINK and sys.platform != 'win32':
210 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000211 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000212 else:
213 try:
214 os_link(infile, outfile)
215 except OSError:
216 # Probably a different file system.
217 logging.warn(
218 'Failed to hardlink, failing back to copy %s to %s' % (
219 infile, outfile))
220 readable_copy(outfile, infile)
221
222
223def _set_write_bit(path, read_only):
224 """Sets or resets the executable bit on a file or directory."""
225 mode = os.lstat(path).st_mode
226 if read_only:
227 mode = mode & 0500
228 else:
229 mode = mode | 0200
230 if hasattr(os, 'lchmod'):
231 os.lchmod(path, mode) # pylint: disable=E1101
232 else:
233 if stat.S_ISLNK(mode):
234 # Skip symlink without lchmod() support.
235 logging.debug('Can\'t change +w bit on symlink %s' % path)
236 return
237
238 # TODO(maruel): Implement proper DACL modification on Windows.
239 os.chmod(path, mode)
240
241
242def make_writable(root, read_only):
243 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000244 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000245 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
246 for filename in filenames:
247 _set_write_bit(os.path.join(dirpath, filename), read_only)
248
249 for dirname in dirnames:
250 _set_write_bit(os.path.join(dirpath, dirname), read_only)
251
252
253def rmtree(root):
254 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
255 make_writable(root, False)
256 if sys.platform == 'win32':
257 for i in range(3):
258 try:
259 shutil.rmtree(root)
260 break
261 except WindowsError: # pylint: disable=E0602
262 delay = (i+1)*2
263 print >> sys.stderr, (
264 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
265 time.sleep(delay)
266 else:
267 shutil.rmtree(root)
268
269
270def is_same_filesystem(path1, path2):
271 """Returns True if both paths are on the same filesystem.
272
273 This is required to enable the use of hardlinks.
274 """
275 assert os.path.isabs(path1), path1
276 assert os.path.isabs(path2), path2
277 if sys.platform == 'win32':
278 # If the drive letter mismatches, assume it's a separate partition.
279 # TODO(maruel): It should look at the underlying drive, a drive letter could
280 # be a mount point to a directory on another drive.
281 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
282 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
283 if path1[0].lower() != path2[0].lower():
284 return False
285 return os.stat(path1).st_dev == os.stat(path2).st_dev
286
287
288def get_free_space(path):
289 """Returns the number of free bytes."""
290 if sys.platform == 'win32':
291 free_bytes = ctypes.c_ulonglong(0)
292 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
293 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
294 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000295 # For OSes other than Windows.
296 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000297 return f.f_bfree * f.f_frsize
298
299
300def make_temp_dir(prefix, root_dir):
301 """Returns a temporary directory on the same file system as root_dir."""
302 base_temp_dir = None
303 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
304 base_temp_dir = os.path.dirname(root_dir)
305 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
306
307
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000308def load_isolated(content):
309 """Verifies the .isolated file is valid and loads this object with the json
310 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000311 """
312 try:
313 data = json.loads(content)
314 except ValueError:
315 raise ConfigError('Failed to parse: %s...' % content[:100])
316
317 if not isinstance(data, dict):
318 raise ConfigError('Expected dict, got %r' % data)
319
320 for key, value in data.iteritems():
321 if key == 'command':
322 if not isinstance(value, list):
323 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000324 if not value:
325 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000326 for subvalue in value:
327 if not isinstance(subvalue, basestring):
328 raise ConfigError('Expected string, got %r' % subvalue)
329
330 elif key == 'files':
331 if not isinstance(value, dict):
332 raise ConfigError('Expected dict, got %r' % value)
333 for subkey, subvalue in value.iteritems():
334 if not isinstance(subkey, basestring):
335 raise ConfigError('Expected string, got %r' % subkey)
336 if not isinstance(subvalue, dict):
337 raise ConfigError('Expected dict, got %r' % subvalue)
338 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000339 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000340 if not isinstance(subsubvalue, basestring):
341 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000342 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000343 if not isinstance(subsubvalue, int):
344 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000345 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000346 if not RE_IS_SHA1.match(subsubvalue):
347 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000348 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 if not isinstance(subsubvalue, int):
350 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000351 else:
352 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000353 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000354 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000355 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
356 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000357
358 elif key == 'includes':
359 if not isinstance(value, list):
360 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000361 if not value:
362 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000363 for subvalue in value:
364 if not RE_IS_SHA1.match(subvalue):
365 raise ConfigError('Expected sha-1, got %r' % subvalue)
366
367 elif key == 'read_only':
368 if not isinstance(value, bool):
369 raise ConfigError('Expected bool, got %r' % value)
370
371 elif key == 'relative_cwd':
372 if not isinstance(value, basestring):
373 raise ConfigError('Expected string, got %r' % value)
374
375 elif key == 'os':
376 if value != get_flavor():
377 raise ConfigError(
378 'Expected \'os\' to be \'%s\' but got \'%s\'' %
379 (get_flavor(), value))
380
381 else:
382 raise ConfigError('Unknown key %s' % key)
383
384 return data
385
386
387def fix_python_path(cmd):
388 """Returns the fixed command line to call the right python executable."""
389 out = cmd[:]
390 if out[0] == 'python':
391 out[0] = sys.executable
392 elif out[0].endswith('.py'):
393 out.insert(0, sys.executable)
394 return out
395
396
maruel@chromium.orgef333122013-03-12 20:36:40 +0000397def url_open(url, data=None, retry_404=False, content_type=None):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000398 """Attempts to open the given url multiple times.
399
400 |data| can be either:
401 -None for a GET request
402 -str for pre-encoded data
403 -list for data to be encoded
404 -dict for data to be encoded (COUNT_KEY will be added in this case)
405
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000406 Returns a file-like object, where the response may be read from, or None
407 if it was unable to connect.
408 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000409 url_parts = list(urlparse.urlparse(url))
410 server_url = '%s://%s' % (url_parts[0], url_parts[1])
411 request_url = urlparse.urlunparse(['', ''] + url_parts[2:])
412 service = get_http_service(server_url)
413 return service.request(request_url, data, retry_404, content_type)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000414
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000415
416def get_http_service(url):
417 """Returns existing or creates new instance of HttpService that can send
418 requests to given base url.
419 """
420 with _http_services_lock:
421 service = _http_services.get(url)
422 if not service:
423 service = AppEngineService(url)
424 _http_services[url] = service
425 return service
426
427
428class HttpService(object):
429 """Base class for a class that provides an API to HTTP based service:
430 - Provides 'request' method.
431 - Supports automatic request retries.
432 - Supports persistent cookies.
433 - Thread safe.
434 """
435
436 # File to use to store all auth cookies.
maruel@chromium.org16452a32013-04-05 00:18:44 +0000437 COOKIE_FILE = os.path.join('~', '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000438
439 # CookieJar reused by all services + lock that protects its instantiation.
440 _cookie_jar = None
441 _cookie_jar_lock = threading.Lock()
442
443 def __init__(self, url):
444 self.url = str(url.rstrip('/'))
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000445 self.cookie_jar = self.load_cookie_jar()
446 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000447
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000448 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000449 """Called when HTTP server asks client to authenticate.
450 Can be implemented in subclasses.
451 """
452 return False
453
454 @staticmethod
455 def load_cookie_jar():
456 """Returns global CoookieJar object that stores cookies in the file."""
457 with HttpService._cookie_jar_lock:
458 if HttpService._cookie_jar is not None:
459 return HttpService._cookie_jar
460 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
461 jar.load()
462 HttpService._cookie_jar = jar
463 return jar
464
465 @staticmethod
466 def save_cookie_jar():
467 """Called when cookie jar needs to be flushed to disk."""
468 with HttpService._cookie_jar_lock:
469 if HttpService._cookie_jar is not None:
470 HttpService._cookie_jar.save()
471
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000472 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000473 """Returns OpenerDirector that will be used when sending requests.
474 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000475 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000476
477 def request(self, url, data=None, retry_404=False, content_type=None):
478 """Attempts to open the given url multiple times.
479
480 |url| is relative to the server root, i.e. '/some/request?param=1'.
481
482 |data| can be either:
483 -None for a GET request
484 -str for pre-encoded data
485 -list for data to be encoded
486 -dict for data to be encoded (COUNT_KEY will be added in this case)
487
488 Returns a file-like object, where the response may be read from, or None
489 if it was unable to connect.
490 """
491 assert url and url[0] == '/'
492
493 if isinstance(data, dict) and COUNT_KEY in data:
494 logging.error('%s already existed in the data passed into UlrOpen. It '
495 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
496 return None
497
498 method = 'GET' if data is None else 'POST'
499 assert not ((method != 'POST') and content_type), (
500 'Can\'t use content_type on GET')
501
502 def make_request(extra):
503 """Returns a urllib2.Request instance for this specific retry."""
504 if isinstance(data, str) or data is None:
505 payload = data
506 else:
507 if isinstance(data, dict):
508 payload = data.items()
509 else:
510 payload = data[:]
511 payload.extend(extra.iteritems())
512 payload = urllib.urlencode(payload)
513 new_url = urlparse.urljoin(self.url, url.lstrip('/'))
514 if isinstance(data, str) or data is None:
515 # In these cases, add the extra parameter to the query part of the url.
516 url_parts = list(urlparse.urlparse(new_url))
517 # Append the query parameter.
518 if url_parts[4] and extra:
519 url_parts[4] += '&'
520 url_parts[4] += urllib.urlencode(extra)
521 new_url = urlparse.urlunparse(url_parts)
522 request = urllib2.Request(new_url, data=payload)
523 if payload is not None:
524 if content_type:
525 request.add_header('Content-Type', content_type)
526 request.add_header('Content-Length', len(payload))
527 return request
528
529 return self._retry_loop(make_request, retry_404)
530
531 def _retry_loop(self, make_request, retry_404=False):
532 """Runs internal request-retry loop."""
533 authenticated = False
534 last_error = None
535 for attempt in range(MAX_URL_OPEN_ATTEMPTS):
536 extra = {COUNT_KEY: attempt} if attempt else {}
537 request = make_request(extra)
538 try:
539 url_response = self._url_open(request)
540 logging.debug('url_open(%s) succeeded', request.get_full_url())
541 return url_response
542 except urllib2.HTTPError as e:
543 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000544 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000545 # Try to authenticate only once. If it doesn't help, then server does
546 # not support app engine authentication.
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000547 logging.debug('Got %s', e.code)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000548 if not authenticated and self.authenticate():
549 authenticated = True
550 continue
551 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000552 'Unable to authenticate to %s.\n%s',
553 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000554 return None
555
556 if e.code < 500 and not (retry_404 and e.code == 404):
557 # This HTTPError means we reached the server and there was a problem
558 # with the request, so don't retry.
559 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000560 'Able to connect to %s but an exception was thrown.\n%s',
561 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000562 return None
563
564 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000565 logging.warning('Able to connect to %s on attempt %d.\n%s',
566 request.get_full_url(), attempt,
567 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000568 last_error = e
569
570 except (urllib2.URLError, httplib.HTTPException) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000571 logging.warning('Unable to open url %s on attempt %d.\n%s',
572 request.get_full_url(), attempt,
573 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000574 last_error = e
575
576 # Only sleep if we are going to try again.
577 if attempt != MAX_URL_OPEN_ATTEMPTS - 1:
578 self._sleep_before_retry(attempt)
579
580 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000581 request.get_full_url(), MAX_URL_OPEN_ATTEMPTS,
582 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000583 return None
584
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000585 def _url_open(self, request):
586 """Low level method to execute urllib2.Request's.
587 To be mocked in tests.
588 """
589 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000590
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000591 def _sleep_before_retry(self, attempt): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000592 """Sleeps for some amount of time when retrying the request.
593 To be mocked in tests."""
594 duration = random.random() * 3 + math.pow(1.5, (attempt + 1))
595 duration = min(20, max(0.1, duration))
596 time.sleep(duration)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000597
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000598 @staticmethod
599 def _format_exception(exc, verbose=False):
600 """Given an instance of some exception raised by urlopen returns human
601 readable piece of text with detailed information about the error.
602 """
603 out = ['Exception: %s' % (exc,)]
604 if verbose:
605 if isinstance(exc, urllib2.HTTPError):
606 out.append('-' * 10)
607 if exc.hdrs:
608 for header, value in exc.hdrs.items():
609 if not header.startswith('x-'):
610 out.append('%s: %s' % (header.capitalize(), value))
611 out.append('')
612 out.append(exc.read() or '<empty body>')
613 out.append('-' * 10)
614 return '\n'.join(out)
615
maruel@chromium.orgef333122013-03-12 20:36:40 +0000616
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000617class AppEngineService(HttpService):
618 """This class implements authentication support for
619 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000620 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000621
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000622 # This lock ensures that user won't be confused with multiple concurrent
623 # login prompts.
624 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000625
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000626 def __init__(self, url, email=None, password=None):
627 super(AppEngineService, self).__init__(url)
628 self.email = email
629 self.password = password
630 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000631
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000632 def authenticate(self):
633 """Authenticates in the app engine application.
634 Returns True on success.
635 """
636 if not upload:
637 logging.warning('\'upload\' module is missing, '
638 'app engine authentication is disabled.')
639 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000640 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000641 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000642 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000643 class AuthServer(upload.AbstractRpcServer):
644 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000645 # Authentication code needs to know about 302 response.
646 # So make OpenerDirector without HTTPRedirectHandler.
647 opener = urllib2.OpenerDirector()
648 opener.add_handler(urllib2.ProxyHandler())
649 opener.add_handler(urllib2.UnknownHandler())
650 opener.add_handler(urllib2.HTTPHandler())
651 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
652 opener.add_handler(urllib2.HTTPSHandler())
653 opener.add_handler(urllib2.HTTPErrorProcessor())
654 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000655 return opener
656 def PerformAuthentication(self):
657 self._Authenticate()
658 save_cookie_jar()
659 return self.authenticated
660 with AppEngineService._auth_lock:
661 rpc_server = AuthServer(self.url, self.get_credentials)
662 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000663
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000664 def get_credentials(self):
665 """Called during authentication process to get the credentials.
666 May be called mutliple times if authentication fails.
667 Returns tuple (email, password).
668 """
669 # 'authenticate' calls this only if 'upload' is present.
670 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
671 assert upload, '\'upload\' module is required for this to work'
672 if self.email and self.password:
673 return (self.email, self.password)
674 if not self._keyring:
675 self._keyring = upload.KeyringCreds(self.url,
676 self.url.lower(),
677 self.email)
678 return self._keyring.GetUserCredentials()
679
680
681class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
682 """MozillaCookieJar with thread safe load and save."""
683
684 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
685 """Loads cookies from the file if it exists."""
686 filename = filename or self.filename
687 with self._cookies_lock:
688 if os.path.exists(filename):
689 try:
690 cookielib.MozillaCookieJar.load(self, filename,
691 ignore_discard,
692 ignore_expires)
693 logging.debug('Loaded cookies from %s', filename)
694 except (cookielib.LoadError, IOError):
695 pass
696 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000697 try:
698 fd = os.open(filename, os.O_CREAT, 0600)
699 os.close(fd)
700 except OSError:
701 logging.error('Failed to create %s', filename)
702 try:
703 os.chmod(filename, 0600)
704 except OSError:
705 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000706
707 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
708 """Saves cookies to the file, completely overwriting it."""
709 logging.debug('Saving cookies to %s', filename or self.filename)
710 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000711 try:
712 cookielib.MozillaCookieJar.save(self, filename,
713 ignore_discard,
714 ignore_expires)
715 except OSError:
716 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000717
718
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000719class ThreadPool(object):
720 """Implements a multithreaded worker pool oriented for mapping jobs with
721 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000722
723 Arguments:
724 - initial_threads: Number of threads to start immediately. Can be 0 if it is
725 uncertain that threads will be needed.
726 - max_threads: Maximum number of threads that will be started when all the
727 threads are busy working. Often the number of CPU cores.
728 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
729 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000730 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000731 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000732
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000733 def __init__(self, initial_threads, max_threads, queue_size):
734 logging.debug(
735 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
736 assert initial_threads <= max_threads
737 # Update this check once 256 cores CPU are common.
738 assert max_threads <= 256
739
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000740 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000741 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000742
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000743 # Mutables.
744 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000745 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000746 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000747 self._outputs = []
748 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000749 # Number of threads in wait state.
750 self._ready_lock = threading.Lock()
751 self._ready = 0
752 self._workers_lock = threading.Lock()
753 self._workers = []
754 for _ in range(initial_threads):
755 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000756
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000757 def _add_worker(self):
758 """Adds one worker thread if there isn't too many. Thread-safe."""
759 # Better to take the lock two times than hold it for too long.
760 with self._workers_lock:
761 if len(self._workers) >= self._max_threads:
762 return False
763 worker = threading.Thread(target=self._run)
764 with self._workers_lock:
765 if len(self._workers) >= self._max_threads:
766 return False
767 self._workers.append(worker)
768 worker.daemon = True
769 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000770
maruel@chromium.org831958f2013-01-22 15:01:46 +0000771 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000772 """Adds a task, a function to be executed by a worker.
773
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000774 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000775 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000776
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000777 |func| can either return a return value to be added to the output list or
778 be a generator which can emit multiple values.
779
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000780 Returns the index of the item added, e.g. the total number of enqueued items
781 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000782 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000783 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000784 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000785 with self._ready_lock:
786 start_new_worker = not self._ready
787 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000788 self._num_of_added_tasks += 1
789 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000790 self.tasks.put((priority, index, func, args, kwargs))
791 if start_new_worker:
792 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000793 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000794
795 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000796 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000797 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000798 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000799 with self._ready_lock:
800 self._ready += 1
801 task = self.tasks.get()
802 finally:
803 with self._ready_lock:
804 self._ready -= 1
805 try:
806 if task is None:
807 # We're done.
808 return
809 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000810 if inspect.isgeneratorfunction(func):
811 for out in func(*args, **kwargs):
812 self._output_append(out)
813 else:
814 out = func(*args, **kwargs)
815 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000816 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000817 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000818 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000819 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000820 self._outputs_exceptions_cond.acquire()
821 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000822 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000823 self._outputs_exceptions_cond.notifyAll()
824 finally:
825 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000826 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000827 try:
828 self.tasks.task_done()
829 except Exception as e:
830 # We need to catch and log this error here because this is the root
831 # function for the thread, nothing higher will catch the error.
832 logging.exception('Caught exception while marking task as done: %s',
833 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000834
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000835 def _output_append(self, out):
836 if out is not None:
837 self._outputs_exceptions_cond.acquire()
838 try:
839 self._outputs.append(out)
840 self._outputs_exceptions_cond.notifyAll()
841 finally:
842 self._outputs_exceptions_cond.release()
843
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000844 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000845 """Extracts all the results from each threads unordered.
846
847 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000848
849 Note: will wait for all work items to be done before returning an exception.
850 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000851 """
852 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000853 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000854 self._outputs_exceptions_cond.acquire()
855 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000856 if self._exceptions:
857 e = self._exceptions.pop(0)
858 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000859 out = self._outputs
860 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000861 finally:
862 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000863 return out
864
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000865 def get_one_result(self):
866 """Returns the next item that was generated or raises an exception if one
867 occured.
868
869 Warning: this function will hang if there is no work item left. Use join
870 instead.
871 """
872 self._outputs_exceptions_cond.acquire()
873 try:
874 while True:
875 if self._exceptions:
876 e = self._exceptions.pop(0)
877 raise e[0], e[1], e[2]
878 if self._outputs:
879 return self._outputs.pop(0)
880 self._outputs_exceptions_cond.wait()
881 finally:
882 self._outputs_exceptions_cond.release()
883
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000884 def close(self):
885 """Closes all the threads."""
886 for _ in range(len(self._workers)):
887 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000888 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000889 for t in self._workers:
890 t.join()
891
892 def __enter__(self):
893 """Enables 'with' statement."""
894 return self
895
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000896 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000897 """Enables 'with' statement."""
898 self.close()
899
900
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000901def valid_file(filepath, size):
902 """Determines if the given files appears valid (currently it just checks
903 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000904 if size == UNKNOWN_FILE_SIZE:
905 return True
906 actual_size = os.stat(filepath).st_size
907 if size != actual_size:
908 logging.warning(
909 'Found invalid item %s; %d != %d',
910 os.path.basename(filepath), actual_size, size)
911 return False
912 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000913
914
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000915class Profiler(object):
916 def __init__(self, name):
917 self.name = name
918 self.start_time = None
919
920 def __enter__(self):
921 self.start_time = time.time()
922 return self
923
924 def __exit__(self, _exc_type, _exec_value, _traceback):
925 time_taken = time.time() - self.start_time
926 logging.info('Profiling: Section %s took %3.3f seconds',
927 self.name, time_taken)
928
929
930class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000931 """Priority based worker queue to fetch or upload files from a
932 content-address server. Any function may be given as the fetcher/upload,
933 as long as it takes two inputs (the item contents, and their relative
934 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000935
936 Supports local file system, CIFS or http remotes.
937
938 When the priority of items is equals, works in strict FIFO mode.
939 """
940 # Initial and maximum number of worker threads.
941 INITIAL_WORKERS = 2
942 MAX_WORKERS = 16
943 # Priorities.
944 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
945 INTERNAL_PRIORITY_BITS = (1<<8) - 1
946 RETRIES = 5
947
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000948 def __init__(self, destination_root):
949 # Function to fetch a remote object or upload to a remote location..
950 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000951 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000952 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000953 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000954
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000955 def join(self):
956 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000957 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000958
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000959 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000960 """Retrieves an object from the remote data store.
961
962 The smaller |priority| gets fetched first.
963
964 Thread-safe.
965 """
966 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000967 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000968
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000969 def _add_item(self, priority, obj, dest, size):
970 assert isinstance(obj, basestring), obj
971 assert isinstance(dest, basestring), dest
972 assert size is None or isinstance(size, int), size
973 return self._pool.add_task(
974 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000975
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000976 def get_one_result(self):
977 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000978
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000979 def _task_executer(self, priority, obj, dest, size):
980 """Wraps self._do_item to trap and retry on IOError exceptions."""
981 try:
982 self._do_item(obj, dest)
983 if size and not valid_file(dest, size):
984 download_size = os.stat(dest).st_size
985 os.remove(dest)
986 raise IOError('File incorrect size after download of %s. Got %s and '
987 'expected %s' % (obj, download_size, size))
988 # TODO(maruel): Technically, we'd want to have an output queue to be a
989 # PriorityQueue.
990 return obj
991 except IOError as e:
992 logging.debug('Caught IOError: %s', e)
993 # Retry a few times, lowering the priority.
994 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
995 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000996 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000997 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000998
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000999 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001000 """Returns a object to retrieve objects from a remote."""
1001 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001002 def download_file(item, dest):
1003 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
1004 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001005 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +00001006 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001007 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +00001008
1009 # Because the app engine DB is only eventually consistent, retry
1010 # 404 errors because the file might just not be visible yet (even
1011 # though it has been uploaded).
1012 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +00001013 if not connection:
1014 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001015 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +00001016 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001017 with open(dest, 'wb') as f:
1018 while True:
1019 chunk = connection.read(ZIPPED_FILE_CHUNK)
1020 if not chunk:
1021 break
maruel@chromium.org3f039182012-11-27 21:32:41 +00001022 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001023 f.write(decompressor.decompress(chunk))
1024 # Ensure that all the data was properly decompressed.
1025 uncompressed_data = decompressor.flush()
1026 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +00001027 except IOError:
1028 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
1029 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +00001030 except httplib.HTTPException as e:
1031 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +00001032 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001033 # Log the first bytes to see if it's uncompressed data.
1034 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +00001035 raise IOError(
1036 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
1037 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001038
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001039 return download_file
1040
1041 def copy_file(item, dest):
1042 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001043 if source == dest:
1044 logging.info('Source and destination are the same, no action required')
1045 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001046 logging.debug('copy_file(%s, %s)', source, dest)
1047 shutil.copy(source, dest)
1048 return copy_file
1049
1050
1051class CachePolicies(object):
1052 def __init__(self, max_cache_size, min_free_space, max_items):
1053 """
1054 Arguments:
1055 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1056 cache is effectively a leak.
1057 - min_free_space: Trim if disk free space becomes lower than this value. If
1058 0, it unconditionally fill the disk.
1059 - max_items: Maximum number of items to keep in the cache. If 0, do not
1060 enforce a limit.
1061 """
1062 self.max_cache_size = max_cache_size
1063 self.min_free_space = min_free_space
1064 self.max_items = max_items
1065
1066
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001067class NoCache(object):
1068 """This class is intended to be usable everywhere the Cache class is.
1069 Instead of downloading to a cache, all files are downloaded to the target
1070 directory and then moved to where they are needed.
1071 """
1072
1073 def __init__(self, target_directory, remote):
1074 self.target_directory = target_directory
1075 self.remote = remote
1076
1077 def retrieve(self, priority, item, size):
1078 """Get the request file."""
1079 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001080 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001081
1082 def wait_for(self, items):
1083 """Download the first item of the given list if it is missing."""
1084 item = items.iterkeys().next()
1085
1086 if not os.path.exists(self.path(item)):
1087 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001088 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001089 assert downloaded == item
1090
1091 return item
1092
1093 def path(self, item):
1094 return os.path.join(self.target_directory, item)
1095
1096
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001097class Cache(object):
1098 """Stateful LRU cache.
1099
1100 Saves its state as json file.
1101 """
1102 STATE_FILE = 'state.json'
1103
1104 def __init__(self, cache_dir, remote, policies):
1105 """
1106 Arguments:
1107 - cache_dir: Directory where to place the cache.
1108 - remote: Remote where to fetch items from.
1109 - policies: cache retention policies.
1110 """
1111 self.cache_dir = cache_dir
1112 self.remote = remote
1113 self.policies = policies
1114 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1115 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1116 # self.state[0] is the oldest item.
1117 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001118 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001119 # A lookup map to speed up searching.
1120 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001121 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001122
1123 # Items currently being fetched. Keep it local to reduce lock contention.
1124 self._pending_queue = set()
1125
1126 # Profiling values.
1127 self._added = []
1128 self._removed = []
1129 self._free_disk = 0
1130
maruel@chromium.org770993b2012-12-11 17:16:48 +00001131 with Profiler('Setup'):
1132 if not os.path.isdir(self.cache_dir):
1133 os.makedirs(self.cache_dir)
1134 if os.path.isfile(self.state_file):
1135 try:
1136 self.state = json.load(open(self.state_file, 'r'))
1137 except (IOError, ValueError), e:
1138 # Too bad. The file will be overwritten and the cache cleared.
1139 logging.error(
1140 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1141 self._state_need_to_be_saved = True
1142 if (not isinstance(self.state, list) or
1143 not all(
1144 isinstance(i, (list, tuple)) and len(i) == 2
1145 for i in self.state)):
1146 # Discard.
1147 self._state_need_to_be_saved = True
1148 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001149
maruel@chromium.org770993b2012-12-11 17:16:48 +00001150 # Ensure that all files listed in the state still exist and add new ones.
1151 previous = set(filename for filename, _ in self.state)
1152 if len(previous) != len(self.state):
1153 logging.warn('Cache state is corrupted, found duplicate files')
1154 self._state_need_to_be_saved = True
1155 self.state = []
1156
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001157 added = 0
1158 for filename in os.listdir(self.cache_dir):
1159 if filename == self.STATE_FILE:
1160 continue
1161 if filename in previous:
1162 previous.remove(filename)
1163 continue
1164 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001165 if not RE_IS_SHA1.match(filename):
1166 logging.warn('Removing unknown file %s from cache', filename)
1167 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001168 continue
1169 # Insert as the oldest file. It will be deleted eventually if not
1170 # accessed.
1171 self._add(filename, False)
1172 logging.warn('Add unknown file %s to cache', filename)
1173 added += 1
1174
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001175 if added:
1176 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001177 if previous:
1178 logging.warn('Removed %d lost files', len(previous))
1179 # Set explicitly in case self._add() wasn't called.
1180 self._state_need_to_be_saved = True
1181 # Filter out entries that were not found while keeping the previous
1182 # order.
1183 self.state = [
1184 (filename, size) for filename, size in self.state
1185 if filename not in previous
1186 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001187 self.trim()
1188
1189 def __enter__(self):
1190 return self
1191
1192 def __exit__(self, _exc_type, _exec_value, _traceback):
1193 with Profiler('CleanupTrimming'):
1194 self.trim()
1195
1196 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001197 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001198 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001199 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001200 len(self.state),
1201 sum(i[1] for i in self.state) / 1024)
1202 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001203 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1204 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001205
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001206 def remove_file_at_index(self, index):
1207 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001208 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001209 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001210 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001211 # If the lookup was already stale, its possible the filename was not
1212 # present yet.
1213 self._lookup_is_stale = True
1214 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001215 self._removed.append(size)
1216 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001217 except OSError as e:
1218 logging.error('Error attempting to delete a file\n%s' % e)
1219
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001220 def remove_lru_file(self):
1221 """Removes the last recently used file."""
1222 self.remove_file_at_index(0)
1223
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001224 def trim(self):
1225 """Trims anything we don't know, make sure enough free space exists."""
1226 # Ensure maximum cache size.
1227 if self.policies.max_cache_size and self.state:
1228 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1229 self.remove_lru_file()
1230
1231 # Ensure maximum number of items in the cache.
1232 if self.policies.max_items and self.state:
1233 while len(self.state) > self.policies.max_items:
1234 self.remove_lru_file()
1235
1236 # Ensure enough free space.
1237 self._free_disk = get_free_space(self.cache_dir)
1238 while (
1239 self.policies.min_free_space and
1240 self.state and
1241 self._free_disk < self.policies.min_free_space):
1242 self.remove_lru_file()
1243 self._free_disk = get_free_space(self.cache_dir)
1244
1245 self.save()
1246
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001247 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001248 """Retrieves a file from the remote, if not already cached, and adds it to
1249 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001250
1251 If the file is in the cache, verifiy that the file is valid (i.e. it is
1252 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001253 """
1254 assert not '/' in item
1255 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001256 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001257 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001258
1259 if index is not None:
1260 if not valid_file(self.path(item), size):
1261 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001262 index = None
1263 else:
1264 assert index < len(self.state)
1265 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001266 self._state_need_to_be_saved = True
1267 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001268 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001269
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001270 if index is None:
1271 if item in self._pending_queue:
1272 # Already pending. The same object could be referenced multiple times.
1273 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001274 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001275 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001276
1277 def add(self, filepath, obj):
1278 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001279 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001280 if not obj in self._lookup:
1281 link_file(self.path(obj), filepath, HARDLINK)
1282 self._add(obj, True)
1283
1284 def path(self, item):
1285 """Returns the path to one item."""
1286 return os.path.join(self.cache_dir, item)
1287
1288 def save(self):
1289 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001290 if self._state_need_to_be_saved:
1291 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1292 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001293
1294 def wait_for(self, items):
1295 """Starts a loop that waits for at least one of |items| to be retrieved.
1296
1297 Returns the first item retrieved.
1298 """
1299 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001300 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001301 for item in items:
1302 if item in self._lookup:
1303 return item
1304
1305 assert all(i in self._pending_queue for i in items), (
1306 items, self._pending_queue)
1307 # Note that:
1308 # len(self._pending_queue) ==
1309 # ( len(self.remote._workers) - self.remote._ready +
1310 # len(self._remote._queue) + len(self._remote.done))
1311 # There is no lock-free way to verify that.
1312 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001313 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001314 self._pending_queue.remove(item)
1315 self._add(item, True)
1316 if item in items:
1317 return item
1318
1319 def _add(self, item, at_end):
1320 """Adds an item in the internal state.
1321
1322 If |at_end| is False, self._lookup becomes inconsistent and
1323 self._update_lookup() must be called.
1324 """
1325 size = os.stat(self.path(item)).st_size
1326 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001327 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001328 if at_end:
1329 self.state.append((item, size))
1330 self._lookup[item] = len(self.state) - 1
1331 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001332 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001333 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001334
1335 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001336 if self._lookup_is_stale:
1337 self._lookup = dict(
1338 (filename, index) for index, (filename, _) in enumerate(self.state))
1339 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001340
1341
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001342class IsolatedFile(object):
1343 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001344 def __init__(self, obj_hash):
1345 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001346 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001347 self.obj_hash = obj_hash
1348 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001349 # .isolate and all the .isolated files recursively included by it with
1350 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1351 # .isolated file in the hash table, is important, as the later ones are not
1352 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001353 self.can_fetch = False
1354
1355 # Raw data.
1356 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001357 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001358 self.children = []
1359
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001360 # Set once the .isolated file is loaded.
1361 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001362 # Set once the files are fetched.
1363 self.files_fetched = False
1364
1365 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001366 """Verifies the .isolated file is valid and loads this object with the json
1367 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001368 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001369 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1370 assert not self._is_parsed
1371 self.data = load_isolated(content)
1372 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1373 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001374
1375 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001376 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001377
1378 Preemptively request files.
1379
1380 Note that |files| is modified by this function.
1381 """
1382 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001383 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001384 return
1385 logging.debug('fetch_files(%s)' % self.obj_hash)
1386 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001387 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001388 # overriden files must not be fetched.
1389 if filepath not in files:
1390 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001391 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001392 # Preemptively request files.
1393 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001394 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001395 self.files_fetched = True
1396
1397
1398class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001399 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001400 def __init__(self):
1401 self.command = []
1402 self.files = {}
1403 self.read_only = None
1404 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001405 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001406 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001407
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001408 def load(self, cache, root_isolated_hash):
1409 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001410
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001411 It enables support for "included" .isolated files. They are processed in
1412 strict order but fetched asynchronously from the cache. This is important so
1413 that a file in an included .isolated file that is overridden by an embedding
1414 .isolated file is not fetched neededlessly. The includes are fetched in one
1415 pass and the files are fetched as soon as all the ones on the left-side
1416 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001417
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001418 The prioritization is very important here for nested .isolated files.
1419 'includes' have the highest priority and the algorithm is optimized for both
1420 deep and wide trees. A deep one is a long link of .isolated files referenced
1421 one at a time by one item in 'includes'. A wide one has a large number of
1422 'includes' in a single .isolated file. 'left' is defined as an included
1423 .isolated file earlier in the 'includes' list. So the order of the elements
1424 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001425 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001426 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001427 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001428 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001429 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001430 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001431
1432 def update_self(node):
1433 node.fetch_files(cache, self.files)
1434 # Grabs properties.
1435 if not self.command and node.data.get('command'):
1436 self.command = node.data['command']
1437 if self.read_only is None and node.data.get('read_only') is not None:
1438 self.read_only = node.data['read_only']
1439 if (self.relative_cwd is None and
1440 node.data.get('relative_cwd') is not None):
1441 self.relative_cwd = node.data['relative_cwd']
1442
1443 def traverse_tree(node):
1444 if node.can_fetch:
1445 if not node.files_fetched:
1446 update_self(node)
1447 will_break = False
1448 for i in node.children:
1449 if not i.can_fetch:
1450 if will_break:
1451 break
1452 # Automatically mark the first one as fetcheable.
1453 i.can_fetch = True
1454 will_break = True
1455 traverse_tree(i)
1456
1457 while pending:
1458 item_hash = cache.wait_for(pending)
1459 item = pending.pop(item_hash)
1460 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001461 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001462 # It's the root item.
1463 item.can_fetch = True
1464
1465 for new_child in item.children:
1466 h = new_child.obj_hash
1467 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001468 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001469 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001470 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001471
1472 # Traverse the whole tree to see if files can now be fetched.
1473 traverse_tree(self.root)
1474 def check(n):
1475 return all(check(x) for x in n.children) and n.files_fetched
1476 assert check(self.root)
1477 self.relative_cwd = self.relative_cwd or ''
1478 self.read_only = self.read_only or False
1479
1480
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001481def create_directories(base_directory, files):
1482 """Creates the directory structure needed by the given list of files."""
1483 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1484 # Creates the tree of directories to create.
1485 directories = set(os.path.dirname(f) for f in files)
1486 for item in list(directories):
1487 while item:
1488 directories.add(item)
1489 item = os.path.dirname(item)
1490 for d in sorted(directories):
1491 if d:
1492 os.mkdir(os.path.join(base_directory, d))
1493
1494
1495def create_links(base_directory, files):
1496 """Creates any links needed by the given set of files."""
1497 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001498 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001499 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001500 if sys.platform == 'win32':
1501 # TODO(maruel): Create junctions or empty text files similar to what
1502 # cygwin do?
1503 logging.warning('Ignoring symlink %s', filepath)
1504 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001505 outfile = os.path.join(base_directory, filepath)
1506 # symlink doesn't exist on Windows. So the 'link' property should
1507 # never be specified for windows .isolated file.
1508 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1509 if 'm' in properties:
1510 lchmod = getattr(os, 'lchmod', None)
1511 if lchmod:
1512 lchmod(outfile, properties['m'])
1513
1514
1515def setup_commands(base_directory, cwd, cmd):
1516 """Correctly adjusts and then returns the required working directory
1517 and command needed to run the test.
1518 """
1519 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1520 cwd = os.path.join(base_directory, cwd)
1521 if not os.path.isdir(cwd):
1522 os.makedirs(cwd)
1523
1524 # Ensure paths are correctly separated on windows.
1525 cmd[0] = cmd[0].replace('/', os.path.sep)
1526 cmd = fix_python_path(cmd)
1527
1528 return cwd, cmd
1529
1530
1531def generate_remaining_files(files):
1532 """Generates a dictionary of all the remaining files to be downloaded."""
1533 remaining = {}
1534 for filepath, props in files:
1535 if 'h' in props:
1536 remaining.setdefault(props['h'], []).append((filepath, props))
1537
1538 return remaining
1539
1540
1541def download_test_data(isolated_hash, target_directory, remote):
1542 """Downloads the dependencies to the given directory."""
1543 if not os.path.exists(target_directory):
1544 os.makedirs(target_directory)
1545
1546 settings = Settings()
1547 no_cache = NoCache(target_directory, Remote(remote))
1548
1549 # Download all the isolated files.
1550 with Profiler('GetIsolateds') as _prof:
1551 settings.load(no_cache, isolated_hash)
1552
1553 if not settings.command:
1554 print >> sys.stderr, 'No command to run'
1555 return 1
1556
1557 with Profiler('GetRest') as _prof:
1558 create_directories(target_directory, settings.files)
1559 create_links(target_directory, settings.files.iteritems())
1560
1561 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1562 settings.command[:])
1563
1564 remaining = generate_remaining_files(settings.files.iteritems())
1565
1566 # Now block on the remaining files to be downloaded and mapped.
1567 logging.info('Retrieving remaining files')
1568 last_update = time.time()
1569 while remaining:
1570 obj = no_cache.wait_for(remaining)
1571 files = remaining.pop(obj)
1572
1573 for i, (filepath, properties) in enumerate(files):
1574 outfile = os.path.join(target_directory, filepath)
1575 logging.info(no_cache.path(obj))
1576
1577 if i + 1 == len(files):
1578 os.rename(no_cache.path(obj), outfile)
1579 else:
1580 shutil.copyfile(no_cache.path(obj), outfile)
1581
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001582 if 'm' in properties and not sys.platform == 'win32':
1583 # It's not set on Windows. It could be set only in the case of
1584 # downloading content generated from another OS. Do not crash in that
1585 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001586 os.chmod(outfile, properties['m'])
1587
1588 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1589 logging.info('%d files remaining...' % len(remaining))
1590 last_update = time.time()
1591
1592 print('.isolated files successfully downloaded and setup in %s' %
1593 target_directory)
1594 print('To run this test please run the command %s from the directory %s' %
1595 (cmd, cwd))
1596
1597 return 0
1598
1599
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001600def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001601 """Downloads the dependencies in the cache, hardlinks them into a temporary
1602 directory and runs the executable.
1603 """
1604 settings = Settings()
1605 with Cache(cache_dir, Remote(remote), policies) as cache:
1606 outdir = make_temp_dir('run_tha_test', cache_dir)
1607 try:
1608 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001609 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001610 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001611 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001612 # Adds it in the cache. While not strictly necessary, this simplifies
1613 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001614 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001615 cache.add(isolated_hash, h)
1616 isolated_hash = h
1617 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001618
1619 if not settings.command:
1620 print >> sys.stderr, 'No command to run'
1621 return 1
1622
1623 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001624 create_directories(outdir, settings.files)
1625 create_links(outdir, settings.files.iteritems())
1626 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001627
1628 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001629 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1630 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001631
1632 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001633 logging.info('Retrieving remaining files')
1634 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001635 while remaining:
1636 obj = cache.wait_for(remaining)
1637 for filepath, properties in remaining.pop(obj):
1638 outfile = os.path.join(outdir, filepath)
1639 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001640 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001641 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001642 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001643
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001644 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1645 logging.info('%d files remaining...' % len(remaining))
1646 last_update = time.time()
1647
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001648 if settings.read_only:
1649 make_writable(outdir, True)
1650 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001651
1652 # TODO(csharp): This should be specified somewhere else.
1653 # Add a rotating log file if one doesn't already exist.
1654 env = os.environ.copy()
1655 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001656 try:
1657 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001658 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001659 except OSError:
1660 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1661 raise
1662 finally:
1663 rmtree(outdir)
1664
1665
1666def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001667 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001668 parser = optparse.OptionParser(
1669 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1670 parser.add_option(
1671 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1672 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1673
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001674 group = optparse.OptionGroup(parser, 'Download')
1675 group.add_option(
1676 '--download', metavar='DEST',
1677 help='Downloads files to DEST and returns without running, instead of '
1678 'downloading and then running from a temporary directory.')
1679 parser.add_option_group(group)
1680
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001681 group = optparse.OptionGroup(parser, 'Data source')
1682 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001683 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001684 metavar='FILE',
1685 help='File/url describing what to map or run')
1686 group.add_option(
1687 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001688 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001689 parser.add_option_group(group)
1690
1691 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001692 '-r', '--remote', metavar='URL',
1693 default=
1694 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1695 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001696 group = optparse.OptionGroup(parser, 'Cache management')
1697 group.add_option(
1698 '--cache',
1699 default='cache',
1700 metavar='DIR',
1701 help='Cache directory, default=%default')
1702 group.add_option(
1703 '--max-cache-size',
1704 type='int',
1705 metavar='NNN',
1706 default=20*1024*1024*1024,
1707 help='Trim if the cache gets larger than this value, default=%default')
1708 group.add_option(
1709 '--min-free-space',
1710 type='int',
1711 metavar='NNN',
1712 default=1*1024*1024*1024,
1713 help='Trim if disk free space becomes lower than this value, '
1714 'default=%default')
1715 group.add_option(
1716 '--max-items',
1717 type='int',
1718 metavar='NNN',
1719 default=100000,
1720 help='Trim if more than this number of items are in the cache '
1721 'default=%default')
1722 parser.add_option_group(group)
1723
1724 options, args = parser.parse_args()
1725 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001726
1727 logging_console = logging.StreamHandler()
1728 logging_console.setFormatter(logging.Formatter(
1729 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1730 logging_console.setLevel(level)
1731 logging.getLogger().addHandler(logging_console)
1732
1733 logging_rotating_file = logging.handlers.RotatingFileHandler(
1734 RUN_ISOLATED_LOG_FILE,
1735 maxBytes=10 * 1024 * 1024, backupCount=5)
1736 logging_rotating_file.setLevel(logging.DEBUG)
1737 logging_rotating_file.setFormatter(logging.Formatter(
1738 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1739 logging.getLogger().addHandler(logging_rotating_file)
1740
1741 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001742
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001743 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001744 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001745 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001746 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001747 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001748 parser.error('Unsupported args %s' % ' '.join(args))
1749
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001750 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001751 policies = CachePolicies(
1752 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001753
1754 if options.download:
1755 return download_test_data(options.isolated or options.hash,
1756 options.download, options.remote)
1757 else:
1758 try:
1759 return run_tha_test(
1760 options.isolated or options.hash,
1761 options.cache,
1762 options.remote,
1763 policies)
1764 except Exception, e:
1765 # Make sure any exception is logged.
1766 logging.exception(e)
1767 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001768
1769
1770if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001771 # Ensure that we are always running with the correct encoding.
1772 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001773 sys.exit(main())