blob: 3224801e299b31b1d095ab82ed25b78f8e863494 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000016import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000017import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000019import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000020import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000021import optparse
22import os
23import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000024import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000025import re
26import shutil
27import stat
28import subprocess
29import sys
30import tempfile
31import threading
32import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000033import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000035import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000036import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000037import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000039# Try to import 'upload' module used by AppEngineService for authentication.
40# If it is not there, app engine authentication support will be disabled.
41try:
42 from third_party import upload
43 # Hack out upload logging.info()
44 upload.logging = logging.getLogger('upload')
45 # Mac pylint choke on this line.
46 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
47except ImportError:
48 upload = None
49
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000050
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000051# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000052HARDLINK, SYMLINK, COPY = range(1, 4)
53
54RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
55
csharp@chromium.org8dc52542012-11-08 20:29:55 +000056# The file size to be used when we don't know the correct file size,
57# generally used for .isolated files.
58UNKNOWN_FILE_SIZE = None
59
csharp@chromium.orga92403f2012-11-20 15:13:59 +000060# The size of each chunk to read when downloading and unzipping files.
61ZIPPED_FILE_CHUNK = 16 * 1024
62
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000063# The name of the log file to use.
64RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
65
csharp@chromium.orge217f302012-11-22 16:51:53 +000066# The base directory containing this file.
67BASE_DIR = os.path.dirname(os.path.abspath(__file__))
68
69# The name of the log to use for the run_test_cases.py command
70RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
71
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000072# The delay (in seconds) to wait between logging statements when retrieving
73# the required files. This is intended to let the user (or buildbot) know that
74# the program is still running.
75DELAY_BETWEEN_UPDATES_IN_SECS = 30
76
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000077# The name of the key to store the count of url attempts.
78COUNT_KEY = 'UrlOpenAttempt'
79
80# The maximum number of attempts to trying opening a url before aborting.
csharp@chromium.orgf7b25462013-04-02 17:11:25 +000081MAX_URL_OPEN_ATTEMPTS = 30
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000082
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000083# Global (for now) map: server URL (http://example.com) -> HttpService instance.
84# Used by get_http_service to cache HttpService instances.
85_http_services = {}
86_http_services_lock = threading.Lock()
87
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000088
89class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000090 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000091 pass
92
93
94class MappingError(OSError):
95 """Failed to recreate the tree."""
96 pass
97
98
99def get_flavor():
100 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
101 flavors = {
102 'cygwin': 'win',
103 'win32': 'win',
104 'darwin': 'mac',
105 'sunos5': 'solaris',
106 'freebsd7': 'freebsd',
107 'freebsd8': 'freebsd',
108 }
109 return flavors.get(sys.platform, 'linux')
110
111
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000112def fix_default_encoding():
113 """Forces utf8 solidly on all platforms.
114
115 By default python execution environment is lazy and defaults to ascii
116 encoding.
117
118 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
119 """
120 if sys.getdefaultencoding() == 'utf-8':
121 return False
122
123 # Regenerate setdefaultencoding.
124 reload(sys)
125 # Module 'sys' has no 'setdefaultencoding' member
126 # pylint: disable=E1101
127 sys.setdefaultencoding('utf-8')
128 for attr in dir(locale):
129 if attr[0:3] != 'LC_':
130 continue
131 aref = getattr(locale, attr)
132 try:
133 locale.setlocale(aref, '')
134 except locale.Error:
135 continue
136 try:
137 lang = locale.getlocale(aref)[0]
138 except (TypeError, ValueError):
139 continue
140 if lang:
141 try:
142 locale.setlocale(aref, (lang, 'UTF-8'))
143 except locale.Error:
144 os.environ[attr] = lang + '.UTF-8'
145 try:
146 locale.setlocale(locale.LC_ALL, '')
147 except locale.Error:
148 pass
149 return True
150
151
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000152class Unbuffered(object):
153 """Disable buffering on a file object."""
154 def __init__(self, stream):
155 self.stream = stream
156
157 def write(self, data):
158 self.stream.write(data)
159 if '\n' in data:
160 self.stream.flush()
161
162 def __getattr__(self, attr):
163 return getattr(self.stream, attr)
164
165
166def disable_buffering():
167 """Makes this process and child processes stdout unbuffered."""
168 if not os.environ.get('PYTHONUNBUFFERED'):
169 # Since sys.stdout is a C++ object, it's impossible to do
170 # sys.stdout.write = lambda...
171 sys.stdout = Unbuffered(sys.stdout)
172 os.environ['PYTHONUNBUFFERED'] = 'x'
173
174
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000175def os_link(source, link_name):
176 """Add support for os.link() on Windows."""
177 if sys.platform == 'win32':
178 if not ctypes.windll.kernel32.CreateHardLinkW(
179 unicode(link_name), unicode(source), 0):
180 raise OSError()
181 else:
182 os.link(source, link_name)
183
184
185def readable_copy(outfile, infile):
186 """Makes a copy of the file that is readable by everyone."""
187 shutil.copy(infile, outfile)
188 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
189 stat.S_IRGRP | stat.S_IROTH)
190 os.chmod(outfile, read_enabled_mode)
191
192
193def link_file(outfile, infile, action):
194 """Links a file. The type of link depends on |action|."""
195 logging.debug('Mapping %s to %s' % (infile, outfile))
196 if action not in (HARDLINK, SYMLINK, COPY):
197 raise ValueError('Unknown mapping action %s' % action)
198 if not os.path.isfile(infile):
199 raise MappingError('%s is missing' % infile)
200 if os.path.isfile(outfile):
201 raise MappingError(
202 '%s already exist; insize:%d; outsize:%d' %
203 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
204
205 if action == COPY:
206 readable_copy(outfile, infile)
207 elif action == SYMLINK and sys.platform != 'win32':
208 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000209 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000210 else:
211 try:
212 os_link(infile, outfile)
213 except OSError:
214 # Probably a different file system.
215 logging.warn(
216 'Failed to hardlink, failing back to copy %s to %s' % (
217 infile, outfile))
218 readable_copy(outfile, infile)
219
220
221def _set_write_bit(path, read_only):
222 """Sets or resets the executable bit on a file or directory."""
223 mode = os.lstat(path).st_mode
224 if read_only:
225 mode = mode & 0500
226 else:
227 mode = mode | 0200
228 if hasattr(os, 'lchmod'):
229 os.lchmod(path, mode) # pylint: disable=E1101
230 else:
231 if stat.S_ISLNK(mode):
232 # Skip symlink without lchmod() support.
233 logging.debug('Can\'t change +w bit on symlink %s' % path)
234 return
235
236 # TODO(maruel): Implement proper DACL modification on Windows.
237 os.chmod(path, mode)
238
239
240def make_writable(root, read_only):
241 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000242 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000243 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
244 for filename in filenames:
245 _set_write_bit(os.path.join(dirpath, filename), read_only)
246
247 for dirname in dirnames:
248 _set_write_bit(os.path.join(dirpath, dirname), read_only)
249
250
251def rmtree(root):
252 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
253 make_writable(root, False)
254 if sys.platform == 'win32':
255 for i in range(3):
256 try:
257 shutil.rmtree(root)
258 break
259 except WindowsError: # pylint: disable=E0602
260 delay = (i+1)*2
261 print >> sys.stderr, (
262 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
263 time.sleep(delay)
264 else:
265 shutil.rmtree(root)
266
267
268def is_same_filesystem(path1, path2):
269 """Returns True if both paths are on the same filesystem.
270
271 This is required to enable the use of hardlinks.
272 """
273 assert os.path.isabs(path1), path1
274 assert os.path.isabs(path2), path2
275 if sys.platform == 'win32':
276 # If the drive letter mismatches, assume it's a separate partition.
277 # TODO(maruel): It should look at the underlying drive, a drive letter could
278 # be a mount point to a directory on another drive.
279 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
280 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
281 if path1[0].lower() != path2[0].lower():
282 return False
283 return os.stat(path1).st_dev == os.stat(path2).st_dev
284
285
286def get_free_space(path):
287 """Returns the number of free bytes."""
288 if sys.platform == 'win32':
289 free_bytes = ctypes.c_ulonglong(0)
290 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
291 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
292 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000293 # For OSes other than Windows.
294 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000295 return f.f_bfree * f.f_frsize
296
297
298def make_temp_dir(prefix, root_dir):
299 """Returns a temporary directory on the same file system as root_dir."""
300 base_temp_dir = None
301 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
302 base_temp_dir = os.path.dirname(root_dir)
303 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
304
305
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000306def load_isolated(content):
307 """Verifies the .isolated file is valid and loads this object with the json
308 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000309 """
310 try:
311 data = json.loads(content)
312 except ValueError:
313 raise ConfigError('Failed to parse: %s...' % content[:100])
314
315 if not isinstance(data, dict):
316 raise ConfigError('Expected dict, got %r' % data)
317
318 for key, value in data.iteritems():
319 if key == 'command':
320 if not isinstance(value, list):
321 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000322 if not value:
323 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000324 for subvalue in value:
325 if not isinstance(subvalue, basestring):
326 raise ConfigError('Expected string, got %r' % subvalue)
327
328 elif key == 'files':
329 if not isinstance(value, dict):
330 raise ConfigError('Expected dict, got %r' % value)
331 for subkey, subvalue in value.iteritems():
332 if not isinstance(subkey, basestring):
333 raise ConfigError('Expected string, got %r' % subkey)
334 if not isinstance(subvalue, dict):
335 raise ConfigError('Expected dict, got %r' % subvalue)
336 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000337 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000338 if not isinstance(subsubvalue, basestring):
339 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000340 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000341 if not isinstance(subsubvalue, int):
342 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000343 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000344 if not RE_IS_SHA1.match(subsubvalue):
345 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000346 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000347 if not isinstance(subsubvalue, int):
348 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 else:
350 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000351 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000352 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000353 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
354 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000355
356 elif key == 'includes':
357 if not isinstance(value, list):
358 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000359 if not value:
360 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000361 for subvalue in value:
362 if not RE_IS_SHA1.match(subvalue):
363 raise ConfigError('Expected sha-1, got %r' % subvalue)
364
365 elif key == 'read_only':
366 if not isinstance(value, bool):
367 raise ConfigError('Expected bool, got %r' % value)
368
369 elif key == 'relative_cwd':
370 if not isinstance(value, basestring):
371 raise ConfigError('Expected string, got %r' % value)
372
373 elif key == 'os':
374 if value != get_flavor():
375 raise ConfigError(
376 'Expected \'os\' to be \'%s\' but got \'%s\'' %
377 (get_flavor(), value))
378
379 else:
380 raise ConfigError('Unknown key %s' % key)
381
382 return data
383
384
385def fix_python_path(cmd):
386 """Returns the fixed command line to call the right python executable."""
387 out = cmd[:]
388 if out[0] == 'python':
389 out[0] = sys.executable
390 elif out[0].endswith('.py'):
391 out.insert(0, sys.executable)
392 return out
393
394
maruel@chromium.orgef333122013-03-12 20:36:40 +0000395def url_open(url, data=None, retry_404=False, content_type=None):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000396 """Attempts to open the given url multiple times.
397
398 |data| can be either:
399 -None for a GET request
400 -str for pre-encoded data
401 -list for data to be encoded
402 -dict for data to be encoded (COUNT_KEY will be added in this case)
403
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000404 Returns a file-like object, where the response may be read from, or None
405 if it was unable to connect.
406 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000407 url_parts = list(urlparse.urlparse(url))
408 server_url = '%s://%s' % (url_parts[0], url_parts[1])
409 request_url = urlparse.urlunparse(['', ''] + url_parts[2:])
410 service = get_http_service(server_url)
411 return service.request(request_url, data, retry_404, content_type)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000412
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000413
414def get_http_service(url):
415 """Returns existing or creates new instance of HttpService that can send
416 requests to given base url.
417 """
418 with _http_services_lock:
419 service = _http_services.get(url)
420 if not service:
421 service = AppEngineService(url)
422 _http_services[url] = service
423 return service
424
425
426class HttpService(object):
427 """Base class for a class that provides an API to HTTP based service:
428 - Provides 'request' method.
429 - Supports automatic request retries.
430 - Supports persistent cookies.
431 - Thread safe.
432 """
433
434 # File to use to store all auth cookies.
435 COOKIE_FILE = '~/.isolated_cookies'
436
437 # CookieJar reused by all services + lock that protects its instantiation.
438 _cookie_jar = None
439 _cookie_jar_lock = threading.Lock()
440
441 def __init__(self, url):
442 self.url = str(url.rstrip('/'))
443 self.opener = self.create_url_opener(self.load_cookie_jar())
444
445 def authenticate(self): # pylint: disable=R0201
446 """Called when HTTP server asks client to authenticate.
447 Can be implemented in subclasses.
448 """
449 return False
450
451 @staticmethod
452 def load_cookie_jar():
453 """Returns global CoookieJar object that stores cookies in the file."""
454 with HttpService._cookie_jar_lock:
455 if HttpService._cookie_jar is not None:
456 return HttpService._cookie_jar
457 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
458 jar.load()
459 HttpService._cookie_jar = jar
460 return jar
461
462 @staticmethod
463 def save_cookie_jar():
464 """Called when cookie jar needs to be flushed to disk."""
465 with HttpService._cookie_jar_lock:
466 if HttpService._cookie_jar is not None:
467 HttpService._cookie_jar.save()
468
469 def create_url_opener(self, cookie_jar): # pylint: disable=R0201
470 """Returns OpenerDirector that will be used when sending requests.
471 Can be reimplemented in subclasses."""
472 opener = urllib2.OpenerDirector()
473 opener.add_handler(urllib2.ProxyHandler())
474 opener.add_handler(urllib2.UnknownHandler())
475 opener.add_handler(urllib2.HTTPHandler())
476 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
477 opener.add_handler(urllib2.HTTPSHandler())
478 opener.add_handler(urllib2.HTTPErrorProcessor())
479 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
480 return opener
481
482 def request(self, url, data=None, retry_404=False, content_type=None):
483 """Attempts to open the given url multiple times.
484
485 |url| is relative to the server root, i.e. '/some/request?param=1'.
486
487 |data| can be either:
488 -None for a GET request
489 -str for pre-encoded data
490 -list for data to be encoded
491 -dict for data to be encoded (COUNT_KEY will be added in this case)
492
493 Returns a file-like object, where the response may be read from, or None
494 if it was unable to connect.
495 """
496 assert url and url[0] == '/'
497
498 if isinstance(data, dict) and COUNT_KEY in data:
499 logging.error('%s already existed in the data passed into UlrOpen. It '
500 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
501 return None
502
503 method = 'GET' if data is None else 'POST'
504 assert not ((method != 'POST') and content_type), (
505 'Can\'t use content_type on GET')
506
507 def make_request(extra):
508 """Returns a urllib2.Request instance for this specific retry."""
509 if isinstance(data, str) or data is None:
510 payload = data
511 else:
512 if isinstance(data, dict):
513 payload = data.items()
514 else:
515 payload = data[:]
516 payload.extend(extra.iteritems())
517 payload = urllib.urlencode(payload)
518 new_url = urlparse.urljoin(self.url, url.lstrip('/'))
519 if isinstance(data, str) or data is None:
520 # In these cases, add the extra parameter to the query part of the url.
521 url_parts = list(urlparse.urlparse(new_url))
522 # Append the query parameter.
523 if url_parts[4] and extra:
524 url_parts[4] += '&'
525 url_parts[4] += urllib.urlencode(extra)
526 new_url = urlparse.urlunparse(url_parts)
527 request = urllib2.Request(new_url, data=payload)
528 if payload is not None:
529 if content_type:
530 request.add_header('Content-Type', content_type)
531 request.add_header('Content-Length', len(payload))
532 return request
533
534 return self._retry_loop(make_request, retry_404)
535
536 def _retry_loop(self, make_request, retry_404=False):
537 """Runs internal request-retry loop."""
538 authenticated = False
539 last_error = None
540 for attempt in range(MAX_URL_OPEN_ATTEMPTS):
541 extra = {COUNT_KEY: attempt} if attempt else {}
542 request = make_request(extra)
543 try:
544 url_response = self._url_open(request)
545 logging.debug('url_open(%s) succeeded', request.get_full_url())
546 return url_response
547 except urllib2.HTTPError as e:
548 # Unauthorized. Ask to authenticate and then try again.
549 if e.code in (302, 401, 403):
550 # Try to authenticate only once. If it doesn't help, then server does
551 # not support app engine authentication.
552 if not authenticated and self.authenticate():
553 authenticated = True
554 continue
555 logging.error(
556 'Unable to authenticate to %s.\n%s\n%s',
557 request.get_full_url(), e, e.read())
558 return None
559
560 if e.code < 500 and not (retry_404 and e.code == 404):
561 # This HTTPError means we reached the server and there was a problem
562 # with the request, so don't retry.
563 logging.error(
564 'Able to connect to %s but an exception was thrown.\n%s\n%s',
565 request.get_full_url(), e, e.read())
566 return None
567
568 # The HTTPError was due to a server error, so retry the attempt.
569 logging.warning('Able to connect to %s on attempt %d.\nException: %s ',
570 request.get_full_url(), attempt, e)
571 last_error = e
572
573 except (urllib2.URLError, httplib.HTTPException) as e:
574 logging.warning('Unable to open url %s on attempt %d.\nException: %s',
575 request.get_full_url(), attempt, e)
576 last_error = e
577
578 # Only sleep if we are going to try again.
579 if attempt != MAX_URL_OPEN_ATTEMPTS - 1:
580 self._sleep_before_retry(attempt)
581
582 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
583 request.get_full_url(), MAX_URL_OPEN_ATTEMPTS, last_error)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000584 return None
585
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000586 def _url_open(self, request):
587 """Low level method to execute urllib2.Request's.
588 To be mocked in tests.
589 """
590 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000591
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000592 def _sleep_before_retry(self, attempt): # pylint: disable=R0201
593 """Sleeps for some amount of time when retrying the request.
594 To be mocked in tests."""
595 duration = random.random() * 3 + math.pow(1.5, (attempt + 1))
596 duration = min(20, max(0.1, duration))
597 time.sleep(duration)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000598
599
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000600class AppEngineService(HttpService):
601 """This class implements authentication support for
602 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000603 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000604
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000605 # This lock ensures that user won't be confused with multiple concurrent
606 # login prompts.
607 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000608
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000609 def __init__(self, url, email=None, password=None):
610 super(AppEngineService, self).__init__(url)
611 self.email = email
612 self.password = password
613 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000614
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000615 def authenticate(self):
616 """Authenticates in the app engine application.
617 Returns True on success.
618 """
619 if not upload:
620 logging.warning('\'upload\' module is missing, '
621 'app engine authentication is disabled.')
622 return False
623 opener = self.opener
624 save_cookie_jar = self.save_cookie_jar
625 # RPC server that uses AuthenticationSupport's cookie jar and url opener.
626 class AuthServer(upload.AbstractRpcServer):
627 def _GetOpener(self):
628 return opener
629 def PerformAuthentication(self):
630 self._Authenticate()
631 save_cookie_jar()
632 return self.authenticated
633 with AppEngineService._auth_lock:
634 rpc_server = AuthServer(self.url, self.get_credentials)
635 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000636
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000637 def get_credentials(self):
638 """Called during authentication process to get the credentials.
639 May be called mutliple times if authentication fails.
640 Returns tuple (email, password).
641 """
642 # 'authenticate' calls this only if 'upload' is present.
643 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
644 assert upload, '\'upload\' module is required for this to work'
645 if self.email and self.password:
646 return (self.email, self.password)
647 if not self._keyring:
648 self._keyring = upload.KeyringCreds(self.url,
649 self.url.lower(),
650 self.email)
651 return self._keyring.GetUserCredentials()
652
653
654class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
655 """MozillaCookieJar with thread safe load and save."""
656
657 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
658 """Loads cookies from the file if it exists."""
659 filename = filename or self.filename
660 with self._cookies_lock:
661 if os.path.exists(filename):
662 try:
663 cookielib.MozillaCookieJar.load(self, filename,
664 ignore_discard,
665 ignore_expires)
666 logging.debug('Loaded cookies from %s', filename)
667 except (cookielib.LoadError, IOError):
668 pass
669 else:
670 fd = os.open(filename, os.O_CREAT, 0600)
671 os.close(fd)
672 os.chmod(filename, 0600)
673
674 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
675 """Saves cookies to the file, completely overwriting it."""
676 logging.debug('Saving cookies to %s', filename or self.filename)
677 with self._cookies_lock:
678 cookielib.MozillaCookieJar.save(self, filename,
679 ignore_discard,
680 ignore_expires)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000681
682
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000683class ThreadPool(object):
684 """Implements a multithreaded worker pool oriented for mapping jobs with
685 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000686
687 Arguments:
688 - initial_threads: Number of threads to start immediately. Can be 0 if it is
689 uncertain that threads will be needed.
690 - max_threads: Maximum number of threads that will be started when all the
691 threads are busy working. Often the number of CPU cores.
692 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
693 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000694 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000695 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000696
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000697 def __init__(self, initial_threads, max_threads, queue_size):
698 logging.debug(
699 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
700 assert initial_threads <= max_threads
701 # Update this check once 256 cores CPU are common.
702 assert max_threads <= 256
703
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000704 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000705 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000706
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000707 # Mutables.
708 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000709 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000710 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000711 self._outputs = []
712 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000713 # Number of threads in wait state.
714 self._ready_lock = threading.Lock()
715 self._ready = 0
716 self._workers_lock = threading.Lock()
717 self._workers = []
718 for _ in range(initial_threads):
719 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000720
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000721 def _add_worker(self):
722 """Adds one worker thread if there isn't too many. Thread-safe."""
723 # Better to take the lock two times than hold it for too long.
724 with self._workers_lock:
725 if len(self._workers) >= self._max_threads:
726 return False
727 worker = threading.Thread(target=self._run)
728 with self._workers_lock:
729 if len(self._workers) >= self._max_threads:
730 return False
731 self._workers.append(worker)
732 worker.daemon = True
733 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000734
maruel@chromium.org831958f2013-01-22 15:01:46 +0000735 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000736 """Adds a task, a function to be executed by a worker.
737
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000738 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000739 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000740
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000741 |func| can either return a return value to be added to the output list or
742 be a generator which can emit multiple values.
743
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000744 Returns the index of the item added, e.g. the total number of enqueued items
745 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000746 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000747 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000748 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000749 with self._ready_lock:
750 start_new_worker = not self._ready
751 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000752 self._num_of_added_tasks += 1
753 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000754 self.tasks.put((priority, index, func, args, kwargs))
755 if start_new_worker:
756 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000757 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000758
759 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000760 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000761 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000762 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000763 with self._ready_lock:
764 self._ready += 1
765 task = self.tasks.get()
766 finally:
767 with self._ready_lock:
768 self._ready -= 1
769 try:
770 if task is None:
771 # We're done.
772 return
773 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000774 if inspect.isgeneratorfunction(func):
775 for out in func(*args, **kwargs):
776 self._output_append(out)
777 else:
778 out = func(*args, **kwargs)
779 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000780 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000781 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000782 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000783 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000784 self._outputs_exceptions_cond.acquire()
785 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000786 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000787 self._outputs_exceptions_cond.notifyAll()
788 finally:
789 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000790 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000791 try:
792 self.tasks.task_done()
793 except Exception as e:
794 # We need to catch and log this error here because this is the root
795 # function for the thread, nothing higher will catch the error.
796 logging.exception('Caught exception while marking task as done: %s',
797 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000798
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000799 def _output_append(self, out):
800 if out is not None:
801 self._outputs_exceptions_cond.acquire()
802 try:
803 self._outputs.append(out)
804 self._outputs_exceptions_cond.notifyAll()
805 finally:
806 self._outputs_exceptions_cond.release()
807
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000808 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000809 """Extracts all the results from each threads unordered.
810
811 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000812
813 Note: will wait for all work items to be done before returning an exception.
814 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000815 """
816 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000817 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000818 self._outputs_exceptions_cond.acquire()
819 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000820 if self._exceptions:
821 e = self._exceptions.pop(0)
822 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000823 out = self._outputs
824 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000825 finally:
826 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000827 return out
828
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000829 def get_one_result(self):
830 """Returns the next item that was generated or raises an exception if one
831 occured.
832
833 Warning: this function will hang if there is no work item left. Use join
834 instead.
835 """
836 self._outputs_exceptions_cond.acquire()
837 try:
838 while True:
839 if self._exceptions:
840 e = self._exceptions.pop(0)
841 raise e[0], e[1], e[2]
842 if self._outputs:
843 return self._outputs.pop(0)
844 self._outputs_exceptions_cond.wait()
845 finally:
846 self._outputs_exceptions_cond.release()
847
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000848 def close(self):
849 """Closes all the threads."""
850 for _ in range(len(self._workers)):
851 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000852 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000853 for t in self._workers:
854 t.join()
855
856 def __enter__(self):
857 """Enables 'with' statement."""
858 return self
859
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000860 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000861 """Enables 'with' statement."""
862 self.close()
863
864
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000865def valid_file(filepath, size):
866 """Determines if the given files appears valid (currently it just checks
867 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000868 if size == UNKNOWN_FILE_SIZE:
869 return True
870 actual_size = os.stat(filepath).st_size
871 if size != actual_size:
872 logging.warning(
873 'Found invalid item %s; %d != %d',
874 os.path.basename(filepath), actual_size, size)
875 return False
876 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000877
878
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000879class Profiler(object):
880 def __init__(self, name):
881 self.name = name
882 self.start_time = None
883
884 def __enter__(self):
885 self.start_time = time.time()
886 return self
887
888 def __exit__(self, _exc_type, _exec_value, _traceback):
889 time_taken = time.time() - self.start_time
890 logging.info('Profiling: Section %s took %3.3f seconds',
891 self.name, time_taken)
892
893
894class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000895 """Priority based worker queue to fetch or upload files from a
896 content-address server. Any function may be given as the fetcher/upload,
897 as long as it takes two inputs (the item contents, and their relative
898 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000899
900 Supports local file system, CIFS or http remotes.
901
902 When the priority of items is equals, works in strict FIFO mode.
903 """
904 # Initial and maximum number of worker threads.
905 INITIAL_WORKERS = 2
906 MAX_WORKERS = 16
907 # Priorities.
908 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
909 INTERNAL_PRIORITY_BITS = (1<<8) - 1
910 RETRIES = 5
911
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000912 def __init__(self, destination_root):
913 # Function to fetch a remote object or upload to a remote location..
914 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000915 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000916 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000917 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000918
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000919 def join(self):
920 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000921 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000922
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000923 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000924 """Retrieves an object from the remote data store.
925
926 The smaller |priority| gets fetched first.
927
928 Thread-safe.
929 """
930 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000931 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000932
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000933 def _add_item(self, priority, obj, dest, size):
934 assert isinstance(obj, basestring), obj
935 assert isinstance(dest, basestring), dest
936 assert size is None or isinstance(size, int), size
937 return self._pool.add_task(
938 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000939
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000940 def get_one_result(self):
941 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000942
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000943 def _task_executer(self, priority, obj, dest, size):
944 """Wraps self._do_item to trap and retry on IOError exceptions."""
945 try:
946 self._do_item(obj, dest)
947 if size and not valid_file(dest, size):
948 download_size = os.stat(dest).st_size
949 os.remove(dest)
950 raise IOError('File incorrect size after download of %s. Got %s and '
951 'expected %s' % (obj, download_size, size))
952 # TODO(maruel): Technically, we'd want to have an output queue to be a
953 # PriorityQueue.
954 return obj
955 except IOError as e:
956 logging.debug('Caught IOError: %s', e)
957 # Retry a few times, lowering the priority.
958 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
959 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000960 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000961 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000962
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000963 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000964 """Returns a object to retrieve objects from a remote."""
965 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000966 def download_file(item, dest):
967 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
968 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000969 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +0000970 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000971 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +0000972
973 # Because the app engine DB is only eventually consistent, retry
974 # 404 errors because the file might just not be visible yet (even
975 # though it has been uploaded).
976 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000977 if not connection:
978 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000979 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +0000980 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000981 with open(dest, 'wb') as f:
982 while True:
983 chunk = connection.read(ZIPPED_FILE_CHUNK)
984 if not chunk:
985 break
maruel@chromium.org3f039182012-11-27 21:32:41 +0000986 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000987 f.write(decompressor.decompress(chunk))
988 # Ensure that all the data was properly decompressed.
989 uncompressed_data = decompressor.flush()
990 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +0000991 except IOError:
992 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
993 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +0000994 except httplib.HTTPException as e:
995 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +0000996 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +0000997 # Log the first bytes to see if it's uncompressed data.
998 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +0000999 raise IOError(
1000 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
1001 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001002
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001003 return download_file
1004
1005 def copy_file(item, dest):
1006 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001007 if source == dest:
1008 logging.info('Source and destination are the same, no action required')
1009 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001010 logging.debug('copy_file(%s, %s)', source, dest)
1011 shutil.copy(source, dest)
1012 return copy_file
1013
1014
1015class CachePolicies(object):
1016 def __init__(self, max_cache_size, min_free_space, max_items):
1017 """
1018 Arguments:
1019 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1020 cache is effectively a leak.
1021 - min_free_space: Trim if disk free space becomes lower than this value. If
1022 0, it unconditionally fill the disk.
1023 - max_items: Maximum number of items to keep in the cache. If 0, do not
1024 enforce a limit.
1025 """
1026 self.max_cache_size = max_cache_size
1027 self.min_free_space = min_free_space
1028 self.max_items = max_items
1029
1030
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001031class NoCache(object):
1032 """This class is intended to be usable everywhere the Cache class is.
1033 Instead of downloading to a cache, all files are downloaded to the target
1034 directory and then moved to where they are needed.
1035 """
1036
1037 def __init__(self, target_directory, remote):
1038 self.target_directory = target_directory
1039 self.remote = remote
1040
1041 def retrieve(self, priority, item, size):
1042 """Get the request file."""
1043 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001044 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001045
1046 def wait_for(self, items):
1047 """Download the first item of the given list if it is missing."""
1048 item = items.iterkeys().next()
1049
1050 if not os.path.exists(self.path(item)):
1051 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001052 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001053 assert downloaded == item
1054
1055 return item
1056
1057 def path(self, item):
1058 return os.path.join(self.target_directory, item)
1059
1060
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001061class Cache(object):
1062 """Stateful LRU cache.
1063
1064 Saves its state as json file.
1065 """
1066 STATE_FILE = 'state.json'
1067
1068 def __init__(self, cache_dir, remote, policies):
1069 """
1070 Arguments:
1071 - cache_dir: Directory where to place the cache.
1072 - remote: Remote where to fetch items from.
1073 - policies: cache retention policies.
1074 """
1075 self.cache_dir = cache_dir
1076 self.remote = remote
1077 self.policies = policies
1078 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1079 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1080 # self.state[0] is the oldest item.
1081 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001082 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001083 # A lookup map to speed up searching.
1084 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001085 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001086
1087 # Items currently being fetched. Keep it local to reduce lock contention.
1088 self._pending_queue = set()
1089
1090 # Profiling values.
1091 self._added = []
1092 self._removed = []
1093 self._free_disk = 0
1094
maruel@chromium.org770993b2012-12-11 17:16:48 +00001095 with Profiler('Setup'):
1096 if not os.path.isdir(self.cache_dir):
1097 os.makedirs(self.cache_dir)
1098 if os.path.isfile(self.state_file):
1099 try:
1100 self.state = json.load(open(self.state_file, 'r'))
1101 except (IOError, ValueError), e:
1102 # Too bad. The file will be overwritten and the cache cleared.
1103 logging.error(
1104 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1105 self._state_need_to_be_saved = True
1106 if (not isinstance(self.state, list) or
1107 not all(
1108 isinstance(i, (list, tuple)) and len(i) == 2
1109 for i in self.state)):
1110 # Discard.
1111 self._state_need_to_be_saved = True
1112 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001113
maruel@chromium.org770993b2012-12-11 17:16:48 +00001114 # Ensure that all files listed in the state still exist and add new ones.
1115 previous = set(filename for filename, _ in self.state)
1116 if len(previous) != len(self.state):
1117 logging.warn('Cache state is corrupted, found duplicate files')
1118 self._state_need_to_be_saved = True
1119 self.state = []
1120
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001121 added = 0
1122 for filename in os.listdir(self.cache_dir):
1123 if filename == self.STATE_FILE:
1124 continue
1125 if filename in previous:
1126 previous.remove(filename)
1127 continue
1128 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001129 if not RE_IS_SHA1.match(filename):
1130 logging.warn('Removing unknown file %s from cache', filename)
1131 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001132 continue
1133 # Insert as the oldest file. It will be deleted eventually if not
1134 # accessed.
1135 self._add(filename, False)
1136 logging.warn('Add unknown file %s to cache', filename)
1137 added += 1
1138
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001139 if added:
1140 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001141 if previous:
1142 logging.warn('Removed %d lost files', len(previous))
1143 # Set explicitly in case self._add() wasn't called.
1144 self._state_need_to_be_saved = True
1145 # Filter out entries that were not found while keeping the previous
1146 # order.
1147 self.state = [
1148 (filename, size) for filename, size in self.state
1149 if filename not in previous
1150 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001151 self.trim()
1152
1153 def __enter__(self):
1154 return self
1155
1156 def __exit__(self, _exc_type, _exec_value, _traceback):
1157 with Profiler('CleanupTrimming'):
1158 self.trim()
1159
1160 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001161 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001162 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001163 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001164 len(self.state),
1165 sum(i[1] for i in self.state) / 1024)
1166 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001167 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1168 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001169
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001170 def remove_file_at_index(self, index):
1171 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001172 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001173 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001174 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001175 # If the lookup was already stale, its possible the filename was not
1176 # present yet.
1177 self._lookup_is_stale = True
1178 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001179 self._removed.append(size)
1180 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001181 except OSError as e:
1182 logging.error('Error attempting to delete a file\n%s' % e)
1183
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001184 def remove_lru_file(self):
1185 """Removes the last recently used file."""
1186 self.remove_file_at_index(0)
1187
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001188 def trim(self):
1189 """Trims anything we don't know, make sure enough free space exists."""
1190 # Ensure maximum cache size.
1191 if self.policies.max_cache_size and self.state:
1192 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1193 self.remove_lru_file()
1194
1195 # Ensure maximum number of items in the cache.
1196 if self.policies.max_items and self.state:
1197 while len(self.state) > self.policies.max_items:
1198 self.remove_lru_file()
1199
1200 # Ensure enough free space.
1201 self._free_disk = get_free_space(self.cache_dir)
1202 while (
1203 self.policies.min_free_space and
1204 self.state and
1205 self._free_disk < self.policies.min_free_space):
1206 self.remove_lru_file()
1207 self._free_disk = get_free_space(self.cache_dir)
1208
1209 self.save()
1210
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001211 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001212 """Retrieves a file from the remote, if not already cached, and adds it to
1213 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001214
1215 If the file is in the cache, verifiy that the file is valid (i.e. it is
1216 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001217 """
1218 assert not '/' in item
1219 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001220 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001221 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001222
1223 if index is not None:
1224 if not valid_file(self.path(item), size):
1225 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001226 index = None
1227 else:
1228 assert index < len(self.state)
1229 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001230 self._state_need_to_be_saved = True
1231 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001232 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001233
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001234 if index is None:
1235 if item in self._pending_queue:
1236 # Already pending. The same object could be referenced multiple times.
1237 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001238 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001239 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001240
1241 def add(self, filepath, obj):
1242 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001243 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001244 if not obj in self._lookup:
1245 link_file(self.path(obj), filepath, HARDLINK)
1246 self._add(obj, True)
1247
1248 def path(self, item):
1249 """Returns the path to one item."""
1250 return os.path.join(self.cache_dir, item)
1251
1252 def save(self):
1253 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001254 if self._state_need_to_be_saved:
1255 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1256 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001257
1258 def wait_for(self, items):
1259 """Starts a loop that waits for at least one of |items| to be retrieved.
1260
1261 Returns the first item retrieved.
1262 """
1263 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001264 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001265 for item in items:
1266 if item in self._lookup:
1267 return item
1268
1269 assert all(i in self._pending_queue for i in items), (
1270 items, self._pending_queue)
1271 # Note that:
1272 # len(self._pending_queue) ==
1273 # ( len(self.remote._workers) - self.remote._ready +
1274 # len(self._remote._queue) + len(self._remote.done))
1275 # There is no lock-free way to verify that.
1276 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001277 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001278 self._pending_queue.remove(item)
1279 self._add(item, True)
1280 if item in items:
1281 return item
1282
1283 def _add(self, item, at_end):
1284 """Adds an item in the internal state.
1285
1286 If |at_end| is False, self._lookup becomes inconsistent and
1287 self._update_lookup() must be called.
1288 """
1289 size = os.stat(self.path(item)).st_size
1290 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001291 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001292 if at_end:
1293 self.state.append((item, size))
1294 self._lookup[item] = len(self.state) - 1
1295 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001296 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001297 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001298
1299 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001300 if self._lookup_is_stale:
1301 self._lookup = dict(
1302 (filename, index) for index, (filename, _) in enumerate(self.state))
1303 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001304
1305
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001306class IsolatedFile(object):
1307 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001308 def __init__(self, obj_hash):
1309 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001310 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001311 self.obj_hash = obj_hash
1312 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001313 # .isolate and all the .isolated files recursively included by it with
1314 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1315 # .isolated file in the hash table, is important, as the later ones are not
1316 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001317 self.can_fetch = False
1318
1319 # Raw data.
1320 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001321 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001322 self.children = []
1323
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001324 # Set once the .isolated file is loaded.
1325 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001326 # Set once the files are fetched.
1327 self.files_fetched = False
1328
1329 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001330 """Verifies the .isolated file is valid and loads this object with the json
1331 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001332 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001333 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1334 assert not self._is_parsed
1335 self.data = load_isolated(content)
1336 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1337 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001338
1339 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001340 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001341
1342 Preemptively request files.
1343
1344 Note that |files| is modified by this function.
1345 """
1346 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001347 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001348 return
1349 logging.debug('fetch_files(%s)' % self.obj_hash)
1350 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001351 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001352 # overriden files must not be fetched.
1353 if filepath not in files:
1354 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001355 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001356 # Preemptively request files.
1357 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001358 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001359 self.files_fetched = True
1360
1361
1362class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001363 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001364 def __init__(self):
1365 self.command = []
1366 self.files = {}
1367 self.read_only = None
1368 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001369 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001370 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001371
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001372 def load(self, cache, root_isolated_hash):
1373 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001374
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001375 It enables support for "included" .isolated files. They are processed in
1376 strict order but fetched asynchronously from the cache. This is important so
1377 that a file in an included .isolated file that is overridden by an embedding
1378 .isolated file is not fetched neededlessly. The includes are fetched in one
1379 pass and the files are fetched as soon as all the ones on the left-side
1380 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001381
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001382 The prioritization is very important here for nested .isolated files.
1383 'includes' have the highest priority and the algorithm is optimized for both
1384 deep and wide trees. A deep one is a long link of .isolated files referenced
1385 one at a time by one item in 'includes'. A wide one has a large number of
1386 'includes' in a single .isolated file. 'left' is defined as an included
1387 .isolated file earlier in the 'includes' list. So the order of the elements
1388 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001389 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001390 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001391 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001392 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001393 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001394 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001395
1396 def update_self(node):
1397 node.fetch_files(cache, self.files)
1398 # Grabs properties.
1399 if not self.command and node.data.get('command'):
1400 self.command = node.data['command']
1401 if self.read_only is None and node.data.get('read_only') is not None:
1402 self.read_only = node.data['read_only']
1403 if (self.relative_cwd is None and
1404 node.data.get('relative_cwd') is not None):
1405 self.relative_cwd = node.data['relative_cwd']
1406
1407 def traverse_tree(node):
1408 if node.can_fetch:
1409 if not node.files_fetched:
1410 update_self(node)
1411 will_break = False
1412 for i in node.children:
1413 if not i.can_fetch:
1414 if will_break:
1415 break
1416 # Automatically mark the first one as fetcheable.
1417 i.can_fetch = True
1418 will_break = True
1419 traverse_tree(i)
1420
1421 while pending:
1422 item_hash = cache.wait_for(pending)
1423 item = pending.pop(item_hash)
1424 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001425 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001426 # It's the root item.
1427 item.can_fetch = True
1428
1429 for new_child in item.children:
1430 h = new_child.obj_hash
1431 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001432 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001433 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001434 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001435
1436 # Traverse the whole tree to see if files can now be fetched.
1437 traverse_tree(self.root)
1438 def check(n):
1439 return all(check(x) for x in n.children) and n.files_fetched
1440 assert check(self.root)
1441 self.relative_cwd = self.relative_cwd or ''
1442 self.read_only = self.read_only or False
1443
1444
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001445def create_directories(base_directory, files):
1446 """Creates the directory structure needed by the given list of files."""
1447 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1448 # Creates the tree of directories to create.
1449 directories = set(os.path.dirname(f) for f in files)
1450 for item in list(directories):
1451 while item:
1452 directories.add(item)
1453 item = os.path.dirname(item)
1454 for d in sorted(directories):
1455 if d:
1456 os.mkdir(os.path.join(base_directory, d))
1457
1458
1459def create_links(base_directory, files):
1460 """Creates any links needed by the given set of files."""
1461 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001462 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001463 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001464 if sys.platform == 'win32':
1465 # TODO(maruel): Create junctions or empty text files similar to what
1466 # cygwin do?
1467 logging.warning('Ignoring symlink %s', filepath)
1468 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001469 outfile = os.path.join(base_directory, filepath)
1470 # symlink doesn't exist on Windows. So the 'link' property should
1471 # never be specified for windows .isolated file.
1472 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1473 if 'm' in properties:
1474 lchmod = getattr(os, 'lchmod', None)
1475 if lchmod:
1476 lchmod(outfile, properties['m'])
1477
1478
1479def setup_commands(base_directory, cwd, cmd):
1480 """Correctly adjusts and then returns the required working directory
1481 and command needed to run the test.
1482 """
1483 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1484 cwd = os.path.join(base_directory, cwd)
1485 if not os.path.isdir(cwd):
1486 os.makedirs(cwd)
1487
1488 # Ensure paths are correctly separated on windows.
1489 cmd[0] = cmd[0].replace('/', os.path.sep)
1490 cmd = fix_python_path(cmd)
1491
1492 return cwd, cmd
1493
1494
1495def generate_remaining_files(files):
1496 """Generates a dictionary of all the remaining files to be downloaded."""
1497 remaining = {}
1498 for filepath, props in files:
1499 if 'h' in props:
1500 remaining.setdefault(props['h'], []).append((filepath, props))
1501
1502 return remaining
1503
1504
1505def download_test_data(isolated_hash, target_directory, remote):
1506 """Downloads the dependencies to the given directory."""
1507 if not os.path.exists(target_directory):
1508 os.makedirs(target_directory)
1509
1510 settings = Settings()
1511 no_cache = NoCache(target_directory, Remote(remote))
1512
1513 # Download all the isolated files.
1514 with Profiler('GetIsolateds') as _prof:
1515 settings.load(no_cache, isolated_hash)
1516
1517 if not settings.command:
1518 print >> sys.stderr, 'No command to run'
1519 return 1
1520
1521 with Profiler('GetRest') as _prof:
1522 create_directories(target_directory, settings.files)
1523 create_links(target_directory, settings.files.iteritems())
1524
1525 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1526 settings.command[:])
1527
1528 remaining = generate_remaining_files(settings.files.iteritems())
1529
1530 # Now block on the remaining files to be downloaded and mapped.
1531 logging.info('Retrieving remaining files')
1532 last_update = time.time()
1533 while remaining:
1534 obj = no_cache.wait_for(remaining)
1535 files = remaining.pop(obj)
1536
1537 for i, (filepath, properties) in enumerate(files):
1538 outfile = os.path.join(target_directory, filepath)
1539 logging.info(no_cache.path(obj))
1540
1541 if i + 1 == len(files):
1542 os.rename(no_cache.path(obj), outfile)
1543 else:
1544 shutil.copyfile(no_cache.path(obj), outfile)
1545
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001546 if 'm' in properties and not sys.platform == 'win32':
1547 # It's not set on Windows. It could be set only in the case of
1548 # downloading content generated from another OS. Do not crash in that
1549 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001550 os.chmod(outfile, properties['m'])
1551
1552 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1553 logging.info('%d files remaining...' % len(remaining))
1554 last_update = time.time()
1555
1556 print('.isolated files successfully downloaded and setup in %s' %
1557 target_directory)
1558 print('To run this test please run the command %s from the directory %s' %
1559 (cmd, cwd))
1560
1561 return 0
1562
1563
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001564def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001565 """Downloads the dependencies in the cache, hardlinks them into a temporary
1566 directory and runs the executable.
1567 """
1568 settings = Settings()
1569 with Cache(cache_dir, Remote(remote), policies) as cache:
1570 outdir = make_temp_dir('run_tha_test', cache_dir)
1571 try:
1572 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001573 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001574 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001575 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001576 # Adds it in the cache. While not strictly necessary, this simplifies
1577 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001578 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001579 cache.add(isolated_hash, h)
1580 isolated_hash = h
1581 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001582
1583 if not settings.command:
1584 print >> sys.stderr, 'No command to run'
1585 return 1
1586
1587 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001588 create_directories(outdir, settings.files)
1589 create_links(outdir, settings.files.iteritems())
1590 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001591
1592 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001593 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1594 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001595
1596 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001597 logging.info('Retrieving remaining files')
1598 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001599 while remaining:
1600 obj = cache.wait_for(remaining)
1601 for filepath, properties in remaining.pop(obj):
1602 outfile = os.path.join(outdir, filepath)
1603 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001604 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001605 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001606 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001607
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001608 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1609 logging.info('%d files remaining...' % len(remaining))
1610 last_update = time.time()
1611
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001612 if settings.read_only:
1613 make_writable(outdir, True)
1614 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001615
1616 # TODO(csharp): This should be specified somewhere else.
1617 # Add a rotating log file if one doesn't already exist.
1618 env = os.environ.copy()
1619 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001620 try:
1621 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001622 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001623 except OSError:
1624 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1625 raise
1626 finally:
1627 rmtree(outdir)
1628
1629
1630def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001631 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001632 parser = optparse.OptionParser(
1633 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1634 parser.add_option(
1635 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1636 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1637
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001638 group = optparse.OptionGroup(parser, 'Download')
1639 group.add_option(
1640 '--download', metavar='DEST',
1641 help='Downloads files to DEST and returns without running, instead of '
1642 'downloading and then running from a temporary directory.')
1643 parser.add_option_group(group)
1644
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001645 group = optparse.OptionGroup(parser, 'Data source')
1646 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001647 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001648 metavar='FILE',
1649 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001650 # TODO(maruel): Remove once not used anymore.
1651 group.add_option(
1652 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001653 group.add_option(
1654 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001655 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001656 parser.add_option_group(group)
1657
1658 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001659 '-r', '--remote', metavar='URL',
1660 default=
1661 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1662 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001663 group = optparse.OptionGroup(parser, 'Cache management')
1664 group.add_option(
1665 '--cache',
1666 default='cache',
1667 metavar='DIR',
1668 help='Cache directory, default=%default')
1669 group.add_option(
1670 '--max-cache-size',
1671 type='int',
1672 metavar='NNN',
1673 default=20*1024*1024*1024,
1674 help='Trim if the cache gets larger than this value, default=%default')
1675 group.add_option(
1676 '--min-free-space',
1677 type='int',
1678 metavar='NNN',
1679 default=1*1024*1024*1024,
1680 help='Trim if disk free space becomes lower than this value, '
1681 'default=%default')
1682 group.add_option(
1683 '--max-items',
1684 type='int',
1685 metavar='NNN',
1686 default=100000,
1687 help='Trim if more than this number of items are in the cache '
1688 'default=%default')
1689 parser.add_option_group(group)
1690
1691 options, args = parser.parse_args()
1692 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001693
1694 logging_console = logging.StreamHandler()
1695 logging_console.setFormatter(logging.Formatter(
1696 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1697 logging_console.setLevel(level)
1698 logging.getLogger().addHandler(logging_console)
1699
1700 logging_rotating_file = logging.handlers.RotatingFileHandler(
1701 RUN_ISOLATED_LOG_FILE,
1702 maxBytes=10 * 1024 * 1024, backupCount=5)
1703 logging_rotating_file.setLevel(logging.DEBUG)
1704 logging_rotating_file.setFormatter(logging.Formatter(
1705 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1706 logging.getLogger().addHandler(logging_rotating_file)
1707
1708 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001709
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001710 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001711 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001712 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001713 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001714 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001715 parser.error('Unsupported args %s' % ' '.join(args))
1716
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001717 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001718 policies = CachePolicies(
1719 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001720
1721 if options.download:
1722 return download_test_data(options.isolated or options.hash,
1723 options.download, options.remote)
1724 else:
1725 try:
1726 return run_tha_test(
1727 options.isolated or options.hash,
1728 options.cache,
1729 options.remote,
1730 policies)
1731 except Exception, e:
1732 # Make sure any exception is logged.
1733 logging.exception(e)
1734 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001735
1736
1737if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001738 # Ensure that we are always running with the correct encoding.
1739 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001740 sys.exit(main())