blob: e25546391fa511d385bd70ff9d3ef1e910f5e4e5 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
13import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000014import httplib
maruel@chromium.orgedd25d02013-03-26 14:38:00 +000015import inspect
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000016import json
csharp@chromium.orgbfb98742013-03-26 20:28:36 +000017import locale
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000019import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000020import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000021import optparse
22import os
23import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000024import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000025import re
26import shutil
27import stat
28import subprocess
29import sys
30import tempfile
31import threading
32import time
maruel@chromium.org97cd0be2013-03-13 14:01:36 +000033import traceback
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000035import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000036import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000037import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000039# Try to import 'upload' module used by AppEngineService for authentication.
40# If it is not there, app engine authentication support will be disabled.
41try:
42 from third_party import upload
43 # Hack out upload logging.info()
44 upload.logging = logging.getLogger('upload')
45 # Mac pylint choke on this line.
46 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
47except ImportError:
48 upload = None
49
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000050
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000051# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000052HARDLINK, SYMLINK, COPY = range(1, 4)
53
54RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
55
csharp@chromium.org8dc52542012-11-08 20:29:55 +000056# The file size to be used when we don't know the correct file size,
57# generally used for .isolated files.
58UNKNOWN_FILE_SIZE = None
59
csharp@chromium.orga92403f2012-11-20 15:13:59 +000060# The size of each chunk to read when downloading and unzipping files.
61ZIPPED_FILE_CHUNK = 16 * 1024
62
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000063# The name of the log file to use.
64RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
65
csharp@chromium.orge217f302012-11-22 16:51:53 +000066# The base directory containing this file.
67BASE_DIR = os.path.dirname(os.path.abspath(__file__))
68
69# The name of the log to use for the run_test_cases.py command
70RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
71
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000072# The delay (in seconds) to wait between logging statements when retrieving
73# the required files. This is intended to let the user (or buildbot) know that
74# the program is still running.
75DELAY_BETWEEN_UPDATES_IN_SECS = 30
76
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000077# The name of the key to store the count of url attempts.
78COUNT_KEY = 'UrlOpenAttempt'
79
80# The maximum number of attempts to trying opening a url before aborting.
csharp@chromium.orgf7b25462013-04-02 17:11:25 +000081MAX_URL_OPEN_ATTEMPTS = 30
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000082
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000083# Global (for now) map: server URL (http://example.com) -> HttpService instance.
84# Used by get_http_service to cache HttpService instances.
85_http_services = {}
86_http_services_lock = threading.Lock()
87
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000088
89class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000090 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000091 pass
92
93
94class MappingError(OSError):
95 """Failed to recreate the tree."""
96 pass
97
98
99def get_flavor():
100 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
101 flavors = {
102 'cygwin': 'win',
103 'win32': 'win',
104 'darwin': 'mac',
105 'sunos5': 'solaris',
106 'freebsd7': 'freebsd',
107 'freebsd8': 'freebsd',
108 }
109 return flavors.get(sys.platform, 'linux')
110
111
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000112def fix_default_encoding():
113 """Forces utf8 solidly on all platforms.
114
115 By default python execution environment is lazy and defaults to ascii
116 encoding.
117
118 http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
119 """
120 if sys.getdefaultencoding() == 'utf-8':
121 return False
122
123 # Regenerate setdefaultencoding.
124 reload(sys)
125 # Module 'sys' has no 'setdefaultencoding' member
126 # pylint: disable=E1101
127 sys.setdefaultencoding('utf-8')
128 for attr in dir(locale):
129 if attr[0:3] != 'LC_':
130 continue
131 aref = getattr(locale, attr)
132 try:
133 locale.setlocale(aref, '')
134 except locale.Error:
135 continue
136 try:
137 lang = locale.getlocale(aref)[0]
138 except (TypeError, ValueError):
139 continue
140 if lang:
141 try:
142 locale.setlocale(aref, (lang, 'UTF-8'))
143 except locale.Error:
144 os.environ[attr] = lang + '.UTF-8'
145 try:
146 locale.setlocale(locale.LC_ALL, '')
147 except locale.Error:
148 pass
149 return True
150
151
maruel@chromium.org46e61cc2013-03-25 19:55:34 +0000152class Unbuffered(object):
153 """Disable buffering on a file object."""
154 def __init__(self, stream):
155 self.stream = stream
156
157 def write(self, data):
158 self.stream.write(data)
159 if '\n' in data:
160 self.stream.flush()
161
162 def __getattr__(self, attr):
163 return getattr(self.stream, attr)
164
165
166def disable_buffering():
167 """Makes this process and child processes stdout unbuffered."""
168 if not os.environ.get('PYTHONUNBUFFERED'):
169 # Since sys.stdout is a C++ object, it's impossible to do
170 # sys.stdout.write = lambda...
171 sys.stdout = Unbuffered(sys.stdout)
172 os.environ['PYTHONUNBUFFERED'] = 'x'
173
174
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000175def os_link(source, link_name):
176 """Add support for os.link() on Windows."""
177 if sys.platform == 'win32':
178 if not ctypes.windll.kernel32.CreateHardLinkW(
179 unicode(link_name), unicode(source), 0):
180 raise OSError()
181 else:
182 os.link(source, link_name)
183
184
185def readable_copy(outfile, infile):
186 """Makes a copy of the file that is readable by everyone."""
187 shutil.copy(infile, outfile)
188 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
189 stat.S_IRGRP | stat.S_IROTH)
190 os.chmod(outfile, read_enabled_mode)
191
192
193def link_file(outfile, infile, action):
194 """Links a file. The type of link depends on |action|."""
195 logging.debug('Mapping %s to %s' % (infile, outfile))
196 if action not in (HARDLINK, SYMLINK, COPY):
197 raise ValueError('Unknown mapping action %s' % action)
198 if not os.path.isfile(infile):
199 raise MappingError('%s is missing' % infile)
200 if os.path.isfile(outfile):
201 raise MappingError(
202 '%s already exist; insize:%d; outsize:%d' %
203 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
204
205 if action == COPY:
206 readable_copy(outfile, infile)
207 elif action == SYMLINK and sys.platform != 'win32':
208 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000209 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000210 else:
211 try:
212 os_link(infile, outfile)
213 except OSError:
214 # Probably a different file system.
215 logging.warn(
216 'Failed to hardlink, failing back to copy %s to %s' % (
217 infile, outfile))
218 readable_copy(outfile, infile)
219
220
221def _set_write_bit(path, read_only):
222 """Sets or resets the executable bit on a file or directory."""
223 mode = os.lstat(path).st_mode
224 if read_only:
225 mode = mode & 0500
226 else:
227 mode = mode | 0200
228 if hasattr(os, 'lchmod'):
229 os.lchmod(path, mode) # pylint: disable=E1101
230 else:
231 if stat.S_ISLNK(mode):
232 # Skip symlink without lchmod() support.
233 logging.debug('Can\'t change +w bit on symlink %s' % path)
234 return
235
236 # TODO(maruel): Implement proper DACL modification on Windows.
237 os.chmod(path, mode)
238
239
240def make_writable(root, read_only):
241 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000242 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000243 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
244 for filename in filenames:
245 _set_write_bit(os.path.join(dirpath, filename), read_only)
246
247 for dirname in dirnames:
248 _set_write_bit(os.path.join(dirpath, dirname), read_only)
249
250
251def rmtree(root):
252 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
253 make_writable(root, False)
254 if sys.platform == 'win32':
255 for i in range(3):
256 try:
257 shutil.rmtree(root)
258 break
259 except WindowsError: # pylint: disable=E0602
260 delay = (i+1)*2
261 print >> sys.stderr, (
262 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
263 time.sleep(delay)
264 else:
265 shutil.rmtree(root)
266
267
268def is_same_filesystem(path1, path2):
269 """Returns True if both paths are on the same filesystem.
270
271 This is required to enable the use of hardlinks.
272 """
273 assert os.path.isabs(path1), path1
274 assert os.path.isabs(path2), path2
275 if sys.platform == 'win32':
276 # If the drive letter mismatches, assume it's a separate partition.
277 # TODO(maruel): It should look at the underlying drive, a drive letter could
278 # be a mount point to a directory on another drive.
279 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
280 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
281 if path1[0].lower() != path2[0].lower():
282 return False
283 return os.stat(path1).st_dev == os.stat(path2).st_dev
284
285
286def get_free_space(path):
287 """Returns the number of free bytes."""
288 if sys.platform == 'win32':
289 free_bytes = ctypes.c_ulonglong(0)
290 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
291 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
292 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000293 # For OSes other than Windows.
294 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000295 return f.f_bfree * f.f_frsize
296
297
298def make_temp_dir(prefix, root_dir):
299 """Returns a temporary directory on the same file system as root_dir."""
300 base_temp_dir = None
301 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
302 base_temp_dir = os.path.dirname(root_dir)
303 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
304
305
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000306def load_isolated(content):
307 """Verifies the .isolated file is valid and loads this object with the json
308 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000309 """
310 try:
311 data = json.loads(content)
312 except ValueError:
313 raise ConfigError('Failed to parse: %s...' % content[:100])
314
315 if not isinstance(data, dict):
316 raise ConfigError('Expected dict, got %r' % data)
317
318 for key, value in data.iteritems():
319 if key == 'command':
320 if not isinstance(value, list):
321 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000322 if not value:
323 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000324 for subvalue in value:
325 if not isinstance(subvalue, basestring):
326 raise ConfigError('Expected string, got %r' % subvalue)
327
328 elif key == 'files':
329 if not isinstance(value, dict):
330 raise ConfigError('Expected dict, got %r' % value)
331 for subkey, subvalue in value.iteritems():
332 if not isinstance(subkey, basestring):
333 raise ConfigError('Expected string, got %r' % subkey)
334 if not isinstance(subvalue, dict):
335 raise ConfigError('Expected dict, got %r' % subvalue)
336 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000337 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000338 if not isinstance(subsubvalue, basestring):
339 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000340 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000341 if not isinstance(subsubvalue, int):
342 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000343 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000344 if not RE_IS_SHA1.match(subsubvalue):
345 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000346 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000347 if not isinstance(subsubvalue, int):
348 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 else:
350 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000351 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000352 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000353 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
354 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000355
356 elif key == 'includes':
357 if not isinstance(value, list):
358 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000359 if not value:
360 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000361 for subvalue in value:
362 if not RE_IS_SHA1.match(subvalue):
363 raise ConfigError('Expected sha-1, got %r' % subvalue)
364
365 elif key == 'read_only':
366 if not isinstance(value, bool):
367 raise ConfigError('Expected bool, got %r' % value)
368
369 elif key == 'relative_cwd':
370 if not isinstance(value, basestring):
371 raise ConfigError('Expected string, got %r' % value)
372
373 elif key == 'os':
374 if value != get_flavor():
375 raise ConfigError(
376 'Expected \'os\' to be \'%s\' but got \'%s\'' %
377 (get_flavor(), value))
378
379 else:
380 raise ConfigError('Unknown key %s' % key)
381
382 return data
383
384
385def fix_python_path(cmd):
386 """Returns the fixed command line to call the right python executable."""
387 out = cmd[:]
388 if out[0] == 'python':
389 out[0] = sys.executable
390 elif out[0].endswith('.py'):
391 out.insert(0, sys.executable)
392 return out
393
394
maruel@chromium.orgef333122013-03-12 20:36:40 +0000395def url_open(url, data=None, retry_404=False, content_type=None):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000396 """Attempts to open the given url multiple times.
397
398 |data| can be either:
399 -None for a GET request
400 -str for pre-encoded data
401 -list for data to be encoded
402 -dict for data to be encoded (COUNT_KEY will be added in this case)
403
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000404 Returns a file-like object, where the response may be read from, or None
405 if it was unable to connect.
406 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000407 url_parts = list(urlparse.urlparse(url))
408 server_url = '%s://%s' % (url_parts[0], url_parts[1])
409 request_url = urlparse.urlunparse(['', ''] + url_parts[2:])
410 service = get_http_service(server_url)
411 return service.request(request_url, data, retry_404, content_type)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000412
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000413
414def get_http_service(url):
415 """Returns existing or creates new instance of HttpService that can send
416 requests to given base url.
417 """
418 with _http_services_lock:
419 service = _http_services.get(url)
420 if not service:
421 service = AppEngineService(url)
422 _http_services[url] = service
423 return service
424
425
426class HttpService(object):
427 """Base class for a class that provides an API to HTTP based service:
428 - Provides 'request' method.
429 - Supports automatic request retries.
430 - Supports persistent cookies.
431 - Thread safe.
432 """
433
434 # File to use to store all auth cookies.
maruel@chromium.org16452a32013-04-05 00:18:44 +0000435 COOKIE_FILE = os.path.join('~', '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000436
437 # CookieJar reused by all services + lock that protects its instantiation.
438 _cookie_jar = None
439 _cookie_jar_lock = threading.Lock()
440
441 def __init__(self, url):
442 self.url = str(url.rstrip('/'))
443 self.opener = self.create_url_opener(self.load_cookie_jar())
444
445 def authenticate(self): # pylint: disable=R0201
446 """Called when HTTP server asks client to authenticate.
447 Can be implemented in subclasses.
448 """
449 return False
450
451 @staticmethod
452 def load_cookie_jar():
453 """Returns global CoookieJar object that stores cookies in the file."""
454 with HttpService._cookie_jar_lock:
455 if HttpService._cookie_jar is not None:
456 return HttpService._cookie_jar
457 jar = ThreadSafeCookieJar(os.path.expanduser(HttpService.COOKIE_FILE))
458 jar.load()
459 HttpService._cookie_jar = jar
460 return jar
461
462 @staticmethod
463 def save_cookie_jar():
464 """Called when cookie jar needs to be flushed to disk."""
465 with HttpService._cookie_jar_lock:
466 if HttpService._cookie_jar is not None:
467 HttpService._cookie_jar.save()
468
469 def create_url_opener(self, cookie_jar): # pylint: disable=R0201
470 """Returns OpenerDirector that will be used when sending requests.
471 Can be reimplemented in subclasses."""
472 opener = urllib2.OpenerDirector()
473 opener.add_handler(urllib2.ProxyHandler())
474 opener.add_handler(urllib2.UnknownHandler())
475 opener.add_handler(urllib2.HTTPHandler())
476 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
477 opener.add_handler(urllib2.HTTPSHandler())
478 opener.add_handler(urllib2.HTTPErrorProcessor())
479 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
480 return opener
481
482 def request(self, url, data=None, retry_404=False, content_type=None):
483 """Attempts to open the given url multiple times.
484
485 |url| is relative to the server root, i.e. '/some/request?param=1'.
486
487 |data| can be either:
488 -None for a GET request
489 -str for pre-encoded data
490 -list for data to be encoded
491 -dict for data to be encoded (COUNT_KEY will be added in this case)
492
493 Returns a file-like object, where the response may be read from, or None
494 if it was unable to connect.
495 """
496 assert url and url[0] == '/'
497
498 if isinstance(data, dict) and COUNT_KEY in data:
499 logging.error('%s already existed in the data passed into UlrOpen. It '
500 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
501 return None
502
503 method = 'GET' if data is None else 'POST'
504 assert not ((method != 'POST') and content_type), (
505 'Can\'t use content_type on GET')
506
507 def make_request(extra):
508 """Returns a urllib2.Request instance for this specific retry."""
509 if isinstance(data, str) or data is None:
510 payload = data
511 else:
512 if isinstance(data, dict):
513 payload = data.items()
514 else:
515 payload = data[:]
516 payload.extend(extra.iteritems())
517 payload = urllib.urlencode(payload)
518 new_url = urlparse.urljoin(self.url, url.lstrip('/'))
519 if isinstance(data, str) or data is None:
520 # In these cases, add the extra parameter to the query part of the url.
521 url_parts = list(urlparse.urlparse(new_url))
522 # Append the query parameter.
523 if url_parts[4] and extra:
524 url_parts[4] += '&'
525 url_parts[4] += urllib.urlencode(extra)
526 new_url = urlparse.urlunparse(url_parts)
527 request = urllib2.Request(new_url, data=payload)
528 if payload is not None:
529 if content_type:
530 request.add_header('Content-Type', content_type)
531 request.add_header('Content-Length', len(payload))
532 return request
533
534 return self._retry_loop(make_request, retry_404)
535
536 def _retry_loop(self, make_request, retry_404=False):
537 """Runs internal request-retry loop."""
538 authenticated = False
539 last_error = None
540 for attempt in range(MAX_URL_OPEN_ATTEMPTS):
541 extra = {COUNT_KEY: attempt} if attempt else {}
542 request = make_request(extra)
543 try:
544 url_response = self._url_open(request)
545 logging.debug('url_open(%s) succeeded', request.get_full_url())
546 return url_response
547 except urllib2.HTTPError as e:
548 # Unauthorized. Ask to authenticate and then try again.
549 if e.code in (302, 401, 403):
550 # Try to authenticate only once. If it doesn't help, then server does
551 # not support app engine authentication.
552 if not authenticated and self.authenticate():
553 authenticated = True
554 continue
555 logging.error(
556 'Unable to authenticate to %s.\n%s\n%s',
557 request.get_full_url(), e, e.read())
558 return None
559
560 if e.code < 500 and not (retry_404 and e.code == 404):
561 # This HTTPError means we reached the server and there was a problem
562 # with the request, so don't retry.
563 logging.error(
564 'Able to connect to %s but an exception was thrown.\n%s\n%s',
565 request.get_full_url(), e, e.read())
566 return None
567
568 # The HTTPError was due to a server error, so retry the attempt.
569 logging.warning('Able to connect to %s on attempt %d.\nException: %s ',
570 request.get_full_url(), attempt, e)
571 last_error = e
572
573 except (urllib2.URLError, httplib.HTTPException) as e:
574 logging.warning('Unable to open url %s on attempt %d.\nException: %s',
575 request.get_full_url(), attempt, e)
576 last_error = e
577
578 # Only sleep if we are going to try again.
579 if attempt != MAX_URL_OPEN_ATTEMPTS - 1:
580 self._sleep_before_retry(attempt)
581
582 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
583 request.get_full_url(), MAX_URL_OPEN_ATTEMPTS, last_error)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000584 return None
585
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000586 def _url_open(self, request):
587 """Low level method to execute urllib2.Request's.
588 To be mocked in tests.
589 """
590 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000591
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000592 def _sleep_before_retry(self, attempt): # pylint: disable=R0201
593 """Sleeps for some amount of time when retrying the request.
594 To be mocked in tests."""
595 duration = random.random() * 3 + math.pow(1.5, (attempt + 1))
596 duration = min(20, max(0.1, duration))
597 time.sleep(duration)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000598
599
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000600class AppEngineService(HttpService):
601 """This class implements authentication support for
602 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000603 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000604
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000605 # This lock ensures that user won't be confused with multiple concurrent
606 # login prompts.
607 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000608
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000609 def __init__(self, url, email=None, password=None):
610 super(AppEngineService, self).__init__(url)
611 self.email = email
612 self.password = password
613 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000614
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000615 def authenticate(self):
616 """Authenticates in the app engine application.
617 Returns True on success.
618 """
619 if not upload:
620 logging.warning('\'upload\' module is missing, '
621 'app engine authentication is disabled.')
622 return False
623 opener = self.opener
624 save_cookie_jar = self.save_cookie_jar
625 # RPC server that uses AuthenticationSupport's cookie jar and url opener.
626 class AuthServer(upload.AbstractRpcServer):
627 def _GetOpener(self):
628 return opener
629 def PerformAuthentication(self):
630 self._Authenticate()
631 save_cookie_jar()
632 return self.authenticated
633 with AppEngineService._auth_lock:
634 rpc_server = AuthServer(self.url, self.get_credentials)
635 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000636
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000637 def get_credentials(self):
638 """Called during authentication process to get the credentials.
639 May be called mutliple times if authentication fails.
640 Returns tuple (email, password).
641 """
642 # 'authenticate' calls this only if 'upload' is present.
643 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
644 assert upload, '\'upload\' module is required for this to work'
645 if self.email and self.password:
646 return (self.email, self.password)
647 if not self._keyring:
648 self._keyring = upload.KeyringCreds(self.url,
649 self.url.lower(),
650 self.email)
651 return self._keyring.GetUserCredentials()
652
653
654class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
655 """MozillaCookieJar with thread safe load and save."""
656
657 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
658 """Loads cookies from the file if it exists."""
659 filename = filename or self.filename
660 with self._cookies_lock:
661 if os.path.exists(filename):
662 try:
663 cookielib.MozillaCookieJar.load(self, filename,
664 ignore_discard,
665 ignore_expires)
666 logging.debug('Loaded cookies from %s', filename)
667 except (cookielib.LoadError, IOError):
668 pass
669 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000670 try:
671 fd = os.open(filename, os.O_CREAT, 0600)
672 os.close(fd)
673 except OSError:
674 logging.error('Failed to create %s', filename)
675 try:
676 os.chmod(filename, 0600)
677 except OSError:
678 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000679
680 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
681 """Saves cookies to the file, completely overwriting it."""
682 logging.debug('Saving cookies to %s', filename or self.filename)
683 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000684 try:
685 cookielib.MozillaCookieJar.save(self, filename,
686 ignore_discard,
687 ignore_expires)
688 except OSError:
689 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000690
691
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000692class ThreadPool(object):
693 """Implements a multithreaded worker pool oriented for mapping jobs with
694 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000695
696 Arguments:
697 - initial_threads: Number of threads to start immediately. Can be 0 if it is
698 uncertain that threads will be needed.
699 - max_threads: Maximum number of threads that will be started when all the
700 threads are busy working. Often the number of CPU cores.
701 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
702 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000703 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000704 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000705
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000706 def __init__(self, initial_threads, max_threads, queue_size):
707 logging.debug(
708 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
709 assert initial_threads <= max_threads
710 # Update this check once 256 cores CPU are common.
711 assert max_threads <= 256
712
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000713 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000714 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000715
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000716 # Mutables.
717 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000718 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000719 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000720 self._outputs = []
721 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000722 # Number of threads in wait state.
723 self._ready_lock = threading.Lock()
724 self._ready = 0
725 self._workers_lock = threading.Lock()
726 self._workers = []
727 for _ in range(initial_threads):
728 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000729
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000730 def _add_worker(self):
731 """Adds one worker thread if there isn't too many. Thread-safe."""
732 # Better to take the lock two times than hold it for too long.
733 with self._workers_lock:
734 if len(self._workers) >= self._max_threads:
735 return False
736 worker = threading.Thread(target=self._run)
737 with self._workers_lock:
738 if len(self._workers) >= self._max_threads:
739 return False
740 self._workers.append(worker)
741 worker.daemon = True
742 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000743
maruel@chromium.org831958f2013-01-22 15:01:46 +0000744 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000745 """Adds a task, a function to be executed by a worker.
746
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000747 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000748 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000749
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000750 |func| can either return a return value to be added to the output list or
751 be a generator which can emit multiple values.
752
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000753 Returns the index of the item added, e.g. the total number of enqueued items
754 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000755 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000756 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000757 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000758 with self._ready_lock:
759 start_new_worker = not self._ready
760 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000761 self._num_of_added_tasks += 1
762 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000763 self.tasks.put((priority, index, func, args, kwargs))
764 if start_new_worker:
765 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000766 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000767
768 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000769 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000770 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000771 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000772 with self._ready_lock:
773 self._ready += 1
774 task = self.tasks.get()
775 finally:
776 with self._ready_lock:
777 self._ready -= 1
778 try:
779 if task is None:
780 # We're done.
781 return
782 _priority, _index, func, args, kwargs = task
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000783 if inspect.isgeneratorfunction(func):
784 for out in func(*args, **kwargs):
785 self._output_append(out)
786 else:
787 out = func(*args, **kwargs)
788 self._output_append(out)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000789 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000790 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000791 exc_info = sys.exc_info()
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000792 logging.info(''.join(traceback.format_tb(exc_info[2])))
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000793 self._outputs_exceptions_cond.acquire()
794 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000795 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000796 self._outputs_exceptions_cond.notifyAll()
797 finally:
798 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000799 finally:
csharp@chromium.org60991182013-03-18 13:44:17 +0000800 try:
801 self.tasks.task_done()
802 except Exception as e:
803 # We need to catch and log this error here because this is the root
804 # function for the thread, nothing higher will catch the error.
805 logging.exception('Caught exception while marking task as done: %s',
806 e)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000807
maruel@chromium.orgedd25d02013-03-26 14:38:00 +0000808 def _output_append(self, out):
809 if out is not None:
810 self._outputs_exceptions_cond.acquire()
811 try:
812 self._outputs.append(out)
813 self._outputs_exceptions_cond.notifyAll()
814 finally:
815 self._outputs_exceptions_cond.release()
816
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000817 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000818 """Extracts all the results from each threads unordered.
819
820 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000821
822 Note: will wait for all work items to be done before returning an exception.
823 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000824 """
825 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000826 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000827 self._outputs_exceptions_cond.acquire()
828 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000829 if self._exceptions:
830 e = self._exceptions.pop(0)
831 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000832 out = self._outputs
833 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000834 finally:
835 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000836 return out
837
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000838 def get_one_result(self):
839 """Returns the next item that was generated or raises an exception if one
840 occured.
841
842 Warning: this function will hang if there is no work item left. Use join
843 instead.
844 """
845 self._outputs_exceptions_cond.acquire()
846 try:
847 while True:
848 if self._exceptions:
849 e = self._exceptions.pop(0)
850 raise e[0], e[1], e[2]
851 if self._outputs:
852 return self._outputs.pop(0)
853 self._outputs_exceptions_cond.wait()
854 finally:
855 self._outputs_exceptions_cond.release()
856
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000857 def close(self):
858 """Closes all the threads."""
859 for _ in range(len(self._workers)):
860 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000861 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000862 for t in self._workers:
863 t.join()
864
865 def __enter__(self):
866 """Enables 'with' statement."""
867 return self
868
maruel@chromium.org97cd0be2013-03-13 14:01:36 +0000869 def __exit__(self, _exc_type, _exc_value, _traceback):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000870 """Enables 'with' statement."""
871 self.close()
872
873
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000874def valid_file(filepath, size):
875 """Determines if the given files appears valid (currently it just checks
876 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000877 if size == UNKNOWN_FILE_SIZE:
878 return True
879 actual_size = os.stat(filepath).st_size
880 if size != actual_size:
881 logging.warning(
882 'Found invalid item %s; %d != %d',
883 os.path.basename(filepath), actual_size, size)
884 return False
885 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000886
887
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000888class Profiler(object):
889 def __init__(self, name):
890 self.name = name
891 self.start_time = None
892
893 def __enter__(self):
894 self.start_time = time.time()
895 return self
896
897 def __exit__(self, _exc_type, _exec_value, _traceback):
898 time_taken = time.time() - self.start_time
899 logging.info('Profiling: Section %s took %3.3f seconds',
900 self.name, time_taken)
901
902
903class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000904 """Priority based worker queue to fetch or upload files from a
905 content-address server. Any function may be given as the fetcher/upload,
906 as long as it takes two inputs (the item contents, and their relative
907 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000908
909 Supports local file system, CIFS or http remotes.
910
911 When the priority of items is equals, works in strict FIFO mode.
912 """
913 # Initial and maximum number of worker threads.
914 INITIAL_WORKERS = 2
915 MAX_WORKERS = 16
916 # Priorities.
917 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
918 INTERNAL_PRIORITY_BITS = (1<<8) - 1
919 RETRIES = 5
920
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000921 def __init__(self, destination_root):
922 # Function to fetch a remote object or upload to a remote location..
923 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000924 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000925 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000926 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000927
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000928 def join(self):
929 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000930 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000931
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000932 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000933 """Retrieves an object from the remote data store.
934
935 The smaller |priority| gets fetched first.
936
937 Thread-safe.
938 """
939 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000940 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000941
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000942 def _add_item(self, priority, obj, dest, size):
943 assert isinstance(obj, basestring), obj
944 assert isinstance(dest, basestring), dest
945 assert size is None or isinstance(size, int), size
946 return self._pool.add_task(
947 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000948
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000949 def get_one_result(self):
950 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000951
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000952 def _task_executer(self, priority, obj, dest, size):
953 """Wraps self._do_item to trap and retry on IOError exceptions."""
954 try:
955 self._do_item(obj, dest)
956 if size and not valid_file(dest, size):
957 download_size = os.stat(dest).st_size
958 os.remove(dest)
959 raise IOError('File incorrect size after download of %s. Got %s and '
960 'expected %s' % (obj, download_size, size))
961 # TODO(maruel): Technically, we'd want to have an output queue to be a
962 # PriorityQueue.
963 return obj
964 except IOError as e:
965 logging.debug('Caught IOError: %s', e)
966 # Retry a few times, lowering the priority.
967 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
968 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000969 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000970 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000971
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000972 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000973 """Returns a object to retrieve objects from a remote."""
974 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000975 def download_file(item, dest):
976 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
977 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000978 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +0000979 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000980 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +0000981
982 # Because the app engine DB is only eventually consistent, retry
983 # 404 errors because the file might just not be visible yet (even
984 # though it has been uploaded).
985 connection = url_open(zipped_source, retry_404=True)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000986 if not connection:
987 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000988 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +0000989 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000990 with open(dest, 'wb') as f:
991 while True:
992 chunk = connection.read(ZIPPED_FILE_CHUNK)
993 if not chunk:
994 break
maruel@chromium.org3f039182012-11-27 21:32:41 +0000995 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000996 f.write(decompressor.decompress(chunk))
997 # Ensure that all the data was properly decompressed.
998 uncompressed_data = decompressor.flush()
999 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +00001000 except IOError:
1001 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
1002 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +00001003 except httplib.HTTPException as e:
1004 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +00001005 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001006 # Log the first bytes to see if it's uncompressed data.
1007 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +00001008 raise IOError(
1009 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
1010 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +00001011
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001012 return download_file
1013
1014 def copy_file(item, dest):
1015 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001016 if source == dest:
1017 logging.info('Source and destination are the same, no action required')
1018 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001019 logging.debug('copy_file(%s, %s)', source, dest)
1020 shutil.copy(source, dest)
1021 return copy_file
1022
1023
1024class CachePolicies(object):
1025 def __init__(self, max_cache_size, min_free_space, max_items):
1026 """
1027 Arguments:
1028 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
1029 cache is effectively a leak.
1030 - min_free_space: Trim if disk free space becomes lower than this value. If
1031 0, it unconditionally fill the disk.
1032 - max_items: Maximum number of items to keep in the cache. If 0, do not
1033 enforce a limit.
1034 """
1035 self.max_cache_size = max_cache_size
1036 self.min_free_space = min_free_space
1037 self.max_items = max_items
1038
1039
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001040class NoCache(object):
1041 """This class is intended to be usable everywhere the Cache class is.
1042 Instead of downloading to a cache, all files are downloaded to the target
1043 directory and then moved to where they are needed.
1044 """
1045
1046 def __init__(self, target_directory, remote):
1047 self.target_directory = target_directory
1048 self.remote = remote
1049
1050 def retrieve(self, priority, item, size):
1051 """Get the request file."""
1052 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001053 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001054
1055 def wait_for(self, items):
1056 """Download the first item of the given list if it is missing."""
1057 item = items.iterkeys().next()
1058
1059 if not os.path.exists(self.path(item)):
1060 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001061 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001062 assert downloaded == item
1063
1064 return item
1065
1066 def path(self, item):
1067 return os.path.join(self.target_directory, item)
1068
1069
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001070class Cache(object):
1071 """Stateful LRU cache.
1072
1073 Saves its state as json file.
1074 """
1075 STATE_FILE = 'state.json'
1076
1077 def __init__(self, cache_dir, remote, policies):
1078 """
1079 Arguments:
1080 - cache_dir: Directory where to place the cache.
1081 - remote: Remote where to fetch items from.
1082 - policies: cache retention policies.
1083 """
1084 self.cache_dir = cache_dir
1085 self.remote = remote
1086 self.policies = policies
1087 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
1088 # The tuple(file, size) are kept as an array in a LRU style. E.g.
1089 # self.state[0] is the oldest item.
1090 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +00001091 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001092 # A lookup map to speed up searching.
1093 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +00001094 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001095
1096 # Items currently being fetched. Keep it local to reduce lock contention.
1097 self._pending_queue = set()
1098
1099 # Profiling values.
1100 self._added = []
1101 self._removed = []
1102 self._free_disk = 0
1103
maruel@chromium.org770993b2012-12-11 17:16:48 +00001104 with Profiler('Setup'):
1105 if not os.path.isdir(self.cache_dir):
1106 os.makedirs(self.cache_dir)
1107 if os.path.isfile(self.state_file):
1108 try:
1109 self.state = json.load(open(self.state_file, 'r'))
1110 except (IOError, ValueError), e:
1111 # Too bad. The file will be overwritten and the cache cleared.
1112 logging.error(
1113 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
1114 self._state_need_to_be_saved = True
1115 if (not isinstance(self.state, list) or
1116 not all(
1117 isinstance(i, (list, tuple)) and len(i) == 2
1118 for i in self.state)):
1119 # Discard.
1120 self._state_need_to_be_saved = True
1121 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001122
maruel@chromium.org770993b2012-12-11 17:16:48 +00001123 # Ensure that all files listed in the state still exist and add new ones.
1124 previous = set(filename for filename, _ in self.state)
1125 if len(previous) != len(self.state):
1126 logging.warn('Cache state is corrupted, found duplicate files')
1127 self._state_need_to_be_saved = True
1128 self.state = []
1129
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001130 added = 0
1131 for filename in os.listdir(self.cache_dir):
1132 if filename == self.STATE_FILE:
1133 continue
1134 if filename in previous:
1135 previous.remove(filename)
1136 continue
1137 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001138 if not RE_IS_SHA1.match(filename):
1139 logging.warn('Removing unknown file %s from cache', filename)
1140 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001141 continue
1142 # Insert as the oldest file. It will be deleted eventually if not
1143 # accessed.
1144 self._add(filename, False)
1145 logging.warn('Add unknown file %s to cache', filename)
1146 added += 1
1147
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001148 if added:
1149 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001150 if previous:
1151 logging.warn('Removed %d lost files', len(previous))
1152 # Set explicitly in case self._add() wasn't called.
1153 self._state_need_to_be_saved = True
1154 # Filter out entries that were not found while keeping the previous
1155 # order.
1156 self.state = [
1157 (filename, size) for filename, size in self.state
1158 if filename not in previous
1159 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001160 self.trim()
1161
1162 def __enter__(self):
1163 return self
1164
1165 def __exit__(self, _exc_type, _exec_value, _traceback):
1166 with Profiler('CleanupTrimming'):
1167 self.trim()
1168
1169 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001170 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001171 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001172 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001173 len(self.state),
1174 sum(i[1] for i in self.state) / 1024)
1175 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001176 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1177 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001178
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001179 def remove_file_at_index(self, index):
1180 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001181 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001182 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001183 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001184 # If the lookup was already stale, its possible the filename was not
1185 # present yet.
1186 self._lookup_is_stale = True
1187 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001188 self._removed.append(size)
1189 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001190 except OSError as e:
1191 logging.error('Error attempting to delete a file\n%s' % e)
1192
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001193 def remove_lru_file(self):
1194 """Removes the last recently used file."""
1195 self.remove_file_at_index(0)
1196
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001197 def trim(self):
1198 """Trims anything we don't know, make sure enough free space exists."""
1199 # Ensure maximum cache size.
1200 if self.policies.max_cache_size and self.state:
1201 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
1202 self.remove_lru_file()
1203
1204 # Ensure maximum number of items in the cache.
1205 if self.policies.max_items and self.state:
1206 while len(self.state) > self.policies.max_items:
1207 self.remove_lru_file()
1208
1209 # Ensure enough free space.
1210 self._free_disk = get_free_space(self.cache_dir)
1211 while (
1212 self.policies.min_free_space and
1213 self.state and
1214 self._free_disk < self.policies.min_free_space):
1215 self.remove_lru_file()
1216 self._free_disk = get_free_space(self.cache_dir)
1217
1218 self.save()
1219
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001220 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001221 """Retrieves a file from the remote, if not already cached, and adds it to
1222 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001223
1224 If the file is in the cache, verifiy that the file is valid (i.e. it is
1225 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001226 """
1227 assert not '/' in item
1228 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001229 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001230 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001231
1232 if index is not None:
1233 if not valid_file(self.path(item), size):
1234 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001235 index = None
1236 else:
1237 assert index < len(self.state)
1238 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001239 self._state_need_to_be_saved = True
1240 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001241 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001242
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001243 if index is None:
1244 if item in self._pending_queue:
1245 # Already pending. The same object could be referenced multiple times.
1246 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001247 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001248 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001249
1250 def add(self, filepath, obj):
1251 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001252 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001253 if not obj in self._lookup:
1254 link_file(self.path(obj), filepath, HARDLINK)
1255 self._add(obj, True)
1256
1257 def path(self, item):
1258 """Returns the path to one item."""
1259 return os.path.join(self.cache_dir, item)
1260
1261 def save(self):
1262 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +00001263 if self._state_need_to_be_saved:
1264 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
1265 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001266
1267 def wait_for(self, items):
1268 """Starts a loop that waits for at least one of |items| to be retrieved.
1269
1270 Returns the first item retrieved.
1271 """
1272 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +00001273 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001274 for item in items:
1275 if item in self._lookup:
1276 return item
1277
1278 assert all(i in self._pending_queue for i in items), (
1279 items, self._pending_queue)
1280 # Note that:
1281 # len(self._pending_queue) ==
1282 # ( len(self.remote._workers) - self.remote._ready +
1283 # len(self._remote._queue) + len(self._remote.done))
1284 # There is no lock-free way to verify that.
1285 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001286 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001287 self._pending_queue.remove(item)
1288 self._add(item, True)
1289 if item in items:
1290 return item
1291
1292 def _add(self, item, at_end):
1293 """Adds an item in the internal state.
1294
1295 If |at_end| is False, self._lookup becomes inconsistent and
1296 self._update_lookup() must be called.
1297 """
1298 size = os.stat(self.path(item)).st_size
1299 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001300 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001301 if at_end:
1302 self.state.append((item, size))
1303 self._lookup[item] = len(self.state) - 1
1304 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +00001305 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001306 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001307
1308 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001309 if self._lookup_is_stale:
1310 self._lookup = dict(
1311 (filename, index) for index, (filename, _) in enumerate(self.state))
1312 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001313
1314
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001315class IsolatedFile(object):
1316 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001317 def __init__(self, obj_hash):
1318 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001319 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001320 self.obj_hash = obj_hash
1321 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001322 # .isolate and all the .isolated files recursively included by it with
1323 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1324 # .isolated file in the hash table, is important, as the later ones are not
1325 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001326 self.can_fetch = False
1327
1328 # Raw data.
1329 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001330 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001331 self.children = []
1332
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001333 # Set once the .isolated file is loaded.
1334 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001335 # Set once the files are fetched.
1336 self.files_fetched = False
1337
1338 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001339 """Verifies the .isolated file is valid and loads this object with the json
1340 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001341 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001342 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1343 assert not self._is_parsed
1344 self.data = load_isolated(content)
1345 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1346 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001347
1348 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001349 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001350
1351 Preemptively request files.
1352
1353 Note that |files| is modified by this function.
1354 """
1355 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001356 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001357 return
1358 logging.debug('fetch_files(%s)' % self.obj_hash)
1359 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001360 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001361 # overriden files must not be fetched.
1362 if filepath not in files:
1363 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001364 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001365 # Preemptively request files.
1366 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001367 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001368 self.files_fetched = True
1369
1370
1371class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001372 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001373 def __init__(self):
1374 self.command = []
1375 self.files = {}
1376 self.read_only = None
1377 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001378 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001379 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001380
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001381 def load(self, cache, root_isolated_hash):
1382 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001383
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001384 It enables support for "included" .isolated files. They are processed in
1385 strict order but fetched asynchronously from the cache. This is important so
1386 that a file in an included .isolated file that is overridden by an embedding
1387 .isolated file is not fetched neededlessly. The includes are fetched in one
1388 pass and the files are fetched as soon as all the ones on the left-side
1389 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001390
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001391 The prioritization is very important here for nested .isolated files.
1392 'includes' have the highest priority and the algorithm is optimized for both
1393 deep and wide trees. A deep one is a long link of .isolated files referenced
1394 one at a time by one item in 'includes'. A wide one has a large number of
1395 'includes' in a single .isolated file. 'left' is defined as an included
1396 .isolated file earlier in the 'includes' list. So the order of the elements
1397 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001398 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001399 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001400 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001401 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001402 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001403 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001404
1405 def update_self(node):
1406 node.fetch_files(cache, self.files)
1407 # Grabs properties.
1408 if not self.command and node.data.get('command'):
1409 self.command = node.data['command']
1410 if self.read_only is None and node.data.get('read_only') is not None:
1411 self.read_only = node.data['read_only']
1412 if (self.relative_cwd is None and
1413 node.data.get('relative_cwd') is not None):
1414 self.relative_cwd = node.data['relative_cwd']
1415
1416 def traverse_tree(node):
1417 if node.can_fetch:
1418 if not node.files_fetched:
1419 update_self(node)
1420 will_break = False
1421 for i in node.children:
1422 if not i.can_fetch:
1423 if will_break:
1424 break
1425 # Automatically mark the first one as fetcheable.
1426 i.can_fetch = True
1427 will_break = True
1428 traverse_tree(i)
1429
1430 while pending:
1431 item_hash = cache.wait_for(pending)
1432 item = pending.pop(item_hash)
1433 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001434 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001435 # It's the root item.
1436 item.can_fetch = True
1437
1438 for new_child in item.children:
1439 h = new_child.obj_hash
1440 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001441 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001442 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001443 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001444
1445 # Traverse the whole tree to see if files can now be fetched.
1446 traverse_tree(self.root)
1447 def check(n):
1448 return all(check(x) for x in n.children) and n.files_fetched
1449 assert check(self.root)
1450 self.relative_cwd = self.relative_cwd or ''
1451 self.read_only = self.read_only or False
1452
1453
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001454def create_directories(base_directory, files):
1455 """Creates the directory structure needed by the given list of files."""
1456 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1457 # Creates the tree of directories to create.
1458 directories = set(os.path.dirname(f) for f in files)
1459 for item in list(directories):
1460 while item:
1461 directories.add(item)
1462 item = os.path.dirname(item)
1463 for d in sorted(directories):
1464 if d:
1465 os.mkdir(os.path.join(base_directory, d))
1466
1467
1468def create_links(base_directory, files):
1469 """Creates any links needed by the given set of files."""
1470 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001471 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001472 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001473 if sys.platform == 'win32':
1474 # TODO(maruel): Create junctions or empty text files similar to what
1475 # cygwin do?
1476 logging.warning('Ignoring symlink %s', filepath)
1477 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001478 outfile = os.path.join(base_directory, filepath)
1479 # symlink doesn't exist on Windows. So the 'link' property should
1480 # never be specified for windows .isolated file.
1481 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1482 if 'm' in properties:
1483 lchmod = getattr(os, 'lchmod', None)
1484 if lchmod:
1485 lchmod(outfile, properties['m'])
1486
1487
1488def setup_commands(base_directory, cwd, cmd):
1489 """Correctly adjusts and then returns the required working directory
1490 and command needed to run the test.
1491 """
1492 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1493 cwd = os.path.join(base_directory, cwd)
1494 if not os.path.isdir(cwd):
1495 os.makedirs(cwd)
1496
1497 # Ensure paths are correctly separated on windows.
1498 cmd[0] = cmd[0].replace('/', os.path.sep)
1499 cmd = fix_python_path(cmd)
1500
1501 return cwd, cmd
1502
1503
1504def generate_remaining_files(files):
1505 """Generates a dictionary of all the remaining files to be downloaded."""
1506 remaining = {}
1507 for filepath, props in files:
1508 if 'h' in props:
1509 remaining.setdefault(props['h'], []).append((filepath, props))
1510
1511 return remaining
1512
1513
1514def download_test_data(isolated_hash, target_directory, remote):
1515 """Downloads the dependencies to the given directory."""
1516 if not os.path.exists(target_directory):
1517 os.makedirs(target_directory)
1518
1519 settings = Settings()
1520 no_cache = NoCache(target_directory, Remote(remote))
1521
1522 # Download all the isolated files.
1523 with Profiler('GetIsolateds') as _prof:
1524 settings.load(no_cache, isolated_hash)
1525
1526 if not settings.command:
1527 print >> sys.stderr, 'No command to run'
1528 return 1
1529
1530 with Profiler('GetRest') as _prof:
1531 create_directories(target_directory, settings.files)
1532 create_links(target_directory, settings.files.iteritems())
1533
1534 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1535 settings.command[:])
1536
1537 remaining = generate_remaining_files(settings.files.iteritems())
1538
1539 # Now block on the remaining files to be downloaded and mapped.
1540 logging.info('Retrieving remaining files')
1541 last_update = time.time()
1542 while remaining:
1543 obj = no_cache.wait_for(remaining)
1544 files = remaining.pop(obj)
1545
1546 for i, (filepath, properties) in enumerate(files):
1547 outfile = os.path.join(target_directory, filepath)
1548 logging.info(no_cache.path(obj))
1549
1550 if i + 1 == len(files):
1551 os.rename(no_cache.path(obj), outfile)
1552 else:
1553 shutil.copyfile(no_cache.path(obj), outfile)
1554
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001555 if 'm' in properties and not sys.platform == 'win32':
1556 # It's not set on Windows. It could be set only in the case of
1557 # downloading content generated from another OS. Do not crash in that
1558 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001559 os.chmod(outfile, properties['m'])
1560
1561 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1562 logging.info('%d files remaining...' % len(remaining))
1563 last_update = time.time()
1564
1565 print('.isolated files successfully downloaded and setup in %s' %
1566 target_directory)
1567 print('To run this test please run the command %s from the directory %s' %
1568 (cmd, cwd))
1569
1570 return 0
1571
1572
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001573def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001574 """Downloads the dependencies in the cache, hardlinks them into a temporary
1575 directory and runs the executable.
1576 """
1577 settings = Settings()
1578 with Cache(cache_dir, Remote(remote), policies) as cache:
1579 outdir = make_temp_dir('run_tha_test', cache_dir)
1580 try:
1581 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001582 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001583 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001584 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001585 # Adds it in the cache. While not strictly necessary, this simplifies
1586 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001587 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001588 cache.add(isolated_hash, h)
1589 isolated_hash = h
1590 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001591
1592 if not settings.command:
1593 print >> sys.stderr, 'No command to run'
1594 return 1
1595
1596 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001597 create_directories(outdir, settings.files)
1598 create_links(outdir, settings.files.iteritems())
1599 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001600
1601 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001602 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1603 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001604
1605 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001606 logging.info('Retrieving remaining files')
1607 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001608 while remaining:
1609 obj = cache.wait_for(remaining)
1610 for filepath, properties in remaining.pop(obj):
1611 outfile = os.path.join(outdir, filepath)
1612 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001613 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001614 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001615 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001616
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001617 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1618 logging.info('%d files remaining...' % len(remaining))
1619 last_update = time.time()
1620
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001621 if settings.read_only:
1622 make_writable(outdir, True)
1623 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001624
1625 # TODO(csharp): This should be specified somewhere else.
1626 # Add a rotating log file if one doesn't already exist.
1627 env = os.environ.copy()
1628 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001629 try:
1630 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001631 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001632 except OSError:
1633 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1634 raise
1635 finally:
1636 rmtree(outdir)
1637
1638
1639def main():
maruel@chromium.org46e61cc2013-03-25 19:55:34 +00001640 disable_buffering()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001641 parser = optparse.OptionParser(
1642 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1643 parser.add_option(
1644 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1645 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1646
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001647 group = optparse.OptionGroup(parser, 'Download')
1648 group.add_option(
1649 '--download', metavar='DEST',
1650 help='Downloads files to DEST and returns without running, instead of '
1651 'downloading and then running from a temporary directory.')
1652 parser.add_option_group(group)
1653
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001654 group = optparse.OptionGroup(parser, 'Data source')
1655 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001656 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001657 metavar='FILE',
1658 help='File/url describing what to map or run')
1659 group.add_option(
1660 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001661 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001662 parser.add_option_group(group)
1663
1664 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001665 '-r', '--remote', metavar='URL',
1666 default=
1667 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1668 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001669 group = optparse.OptionGroup(parser, 'Cache management')
1670 group.add_option(
1671 '--cache',
1672 default='cache',
1673 metavar='DIR',
1674 help='Cache directory, default=%default')
1675 group.add_option(
1676 '--max-cache-size',
1677 type='int',
1678 metavar='NNN',
1679 default=20*1024*1024*1024,
1680 help='Trim if the cache gets larger than this value, default=%default')
1681 group.add_option(
1682 '--min-free-space',
1683 type='int',
1684 metavar='NNN',
1685 default=1*1024*1024*1024,
1686 help='Trim if disk free space becomes lower than this value, '
1687 'default=%default')
1688 group.add_option(
1689 '--max-items',
1690 type='int',
1691 metavar='NNN',
1692 default=100000,
1693 help='Trim if more than this number of items are in the cache '
1694 'default=%default')
1695 parser.add_option_group(group)
1696
1697 options, args = parser.parse_args()
1698 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001699
1700 logging_console = logging.StreamHandler()
1701 logging_console.setFormatter(logging.Formatter(
1702 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1703 logging_console.setLevel(level)
1704 logging.getLogger().addHandler(logging_console)
1705
1706 logging_rotating_file = logging.handlers.RotatingFileHandler(
1707 RUN_ISOLATED_LOG_FILE,
1708 maxBytes=10 * 1024 * 1024, backupCount=5)
1709 logging_rotating_file.setLevel(logging.DEBUG)
1710 logging_rotating_file.setFormatter(logging.Formatter(
1711 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1712 logging.getLogger().addHandler(logging_rotating_file)
1713
1714 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001715
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001716 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001717 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001718 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001719 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001720 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001721 parser.error('Unsupported args %s' % ' '.join(args))
1722
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001723 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001724 policies = CachePolicies(
1725 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001726
1727 if options.download:
1728 return download_test_data(options.isolated or options.hash,
1729 options.download, options.remote)
1730 else:
1731 try:
1732 return run_tha_test(
1733 options.isolated or options.hash,
1734 options.cache,
1735 options.remote,
1736 policies)
1737 except Exception, e:
1738 # Make sure any exception is logged.
1739 logging.exception(e)
1740 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001741
1742
1743if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001744 # Ensure that we are always running with the correct encoding.
1745 fix_default_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001746 sys.exit(main())